Index: head/contrib/llvm/lib/Target/PowerPC/PPC.td
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPC.td	(revision 309148)
+++ head/contrib/llvm/lib/Target/PowerPC/PPC.td	(revision 309149)
@@ -1,453 +1,470 @@
 //===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This is the top level entry point for the PowerPC target.
 //
 //===----------------------------------------------------------------------===//
 
 // Get the target-independent interfaces which we are implementing.
 //
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
 // PowerPC Subtarget features.
 //
 
 //===----------------------------------------------------------------------===//
 // CPU Directives                                                             //
 //===----------------------------------------------------------------------===//
 
 def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
 def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
 def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
 def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
 def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
 def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
 def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
 def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
 def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
 def Directive32  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
 def Directive64  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
 def DirectiveA2  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
 def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
                                        "PPC::DIR_E500mc", "">;
 def DirectiveE5500  : SubtargetFeature<"", "DarwinDirective",
                                        "PPC::DIR_E5500", "">;
 def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
 def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
 def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
 def DirectivePwr5x
     : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
 def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
 def DirectivePwr6x
     : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
 def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
 def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
 def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
                                         "Enable 64-bit instructions">;
-def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
-                              "Use software emulation for floating point">;                                        
+def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true",
+                              "Enable floating-point instructions">;
 def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
                               "Enable 64-bit registers usage for ppc32 [beta]">;
 def FeatureCRBits    : SubtargetFeature<"crbits", "UseCRBits", "true",
                               "Use condition-register bits individually">;
 def FeatureAltivec   : SubtargetFeature<"altivec","HasAltivec", "true",
-                                        "Enable Altivec instructions">;
+                                        "Enable Altivec instructions",
+                                        [FeatureHardFloat]>;
 def FeatureSPE       : SubtargetFeature<"spe","HasSPE", "true",
-                                        "Enable SPE instructions">;
+                                        "Enable SPE instructions",
+                                        [FeatureHardFloat]>;
 def FeatureMFOCRF    : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
                                         "Enable the MFOCRF instruction">;
 def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
-                                        "Enable the fsqrt instruction">;
+                                        "Enable the fsqrt instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFCPSGN    : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
-                                        "Enable the fcpsgn instruction">;
+                                        "Enable the fcpsgn instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFRE       : SubtargetFeature<"fre", "HasFRE", "true",
-                                        "Enable the fre instruction">;
+                                        "Enable the fre instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFRES      : SubtargetFeature<"fres", "HasFRES", "true",
-                                        "Enable the fres instruction">;
+                                        "Enable the fres instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFRSQRTE   : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
-                                        "Enable the frsqrte instruction">;
+                                        "Enable the frsqrte instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFRSQRTES  : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
-                                        "Enable the frsqrtes instruction">;
+                                        "Enable the frsqrtes instruction",
+                                        [FeatureHardFloat]>;
 def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
                               "Assume higher precision reciprocal estimates">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
-                                        "Enable the stfiwx instruction">;
+                                        "Enable the stfiwx instruction",
+                                        [FeatureHardFloat]>;
 def FeatureLFIWAX    : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
-                                        "Enable the lfiwax instruction">;
+                                        "Enable the lfiwax instruction",
+                                        [FeatureHardFloat]>;
 def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
-                                        "Enable the fri[mnpz] instructions">;
+                                        "Enable the fri[mnpz] instructions",
+                                        [FeatureHardFloat]>;
 def FeatureFPCVT     : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
-  "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
+  "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions",
+                                        [FeatureHardFloat]>;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
 def FeatureBPERMD    : SubtargetFeature<"bpermd", "HasBPERMD", "true",
                                         "Enable the bpermd instruction">;
 def FeatureExtDiv    : SubtargetFeature<"extdiv", "HasExtDiv", "true",
                                         "Enable extended divide instructions">;
 def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
                                         "Enable the ldbrx instruction">;
 def FeatureCMPB      : SubtargetFeature<"cmpb", "HasCMPB", "true",
                                         "Enable the cmpb instruction">;
 def FeatureICBT      : SubtargetFeature<"icbt","HasICBT", "true",
                                         "Enable icbt instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                         "Enable Book E instructions",
                                         [FeatureICBT]>;
 def FeatureMSYNC     : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
                               "Has only the msync instruction instead of sync",
                               [FeatureBookE]>;
 def FeatureE500      : SubtargetFeature<"e500", "IsE500", "true",
                                         "Enable E500/E500mc instructions">;
 def FeaturePPC4xx    : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
                                         "Enable PPC 4xx instructions">;
 def FeaturePPC6xx    : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
                                         "Enable PPC 6xx instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
-                                        "Enable QPX instructions">;
+                                        "Enable QPX instructions",
+                                        [FeatureHardFloat]>;
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                         "Enable POWER8 Altivec instructions",
                                         [FeatureAltivec]>;
 def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
                                        "Enable POWER8 Crypto instructions",
                                        [FeatureP8Altivec]>;
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
 def FeatureDirectMove :
   SubtargetFeature<"direct-move", "HasDirectMove", "true",
                    "Enable Power8 direct move instructions",
                    [FeatureVSX]>;
 def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
                                              "HasPartwordAtomics", "true",
                                              "Enable l[bh]arx and st[bh]cx.">;
 def FeatureInvariantFunctionDescriptors :
   SubtargetFeature<"invariant-function-descriptors",
                    "HasInvariantFunctionDescriptors", "true",
                    "Assume function descriptors are invariant">;
 def FeatureLongCall : SubtargetFeature<"longcall", "UseLongCalls", "true",
                                        "Always use indirect calls">;
 def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
 def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
                                      "Target supports add/load integer fusion.">;
 def FeatureFloat128 :
   SubtargetFeature<"float128", "HasFloat128", "true",
                    "Enable the __float128 data type for IEEE-754R Binary128.",
                    [FeatureVSX]>;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD",
                                         "POPCNTD_Fast",
                                         "Enable the popcnt[dw] instructions">;
 // Note that for the a2/a2q processor models we should not use popcnt[dw] by
 // default. These processors do support the instructions, but they're
 // microcoded, and the software emulation is about twice as fast.
 def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
                                           "POPCNTD_Slow",
                                           "Has slow popcnt[dw] instructions">;
 
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
 def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0",
                                      "true",
                                      "Enable instructions added in ISA 3.0.">;
 def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
                                         "Enable POWER9 Altivec instructions",
                                         [FeatureISA3_0, FeatureP8Altivec]>;
 def FeatureP9Vector  : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
                                         "Enable POWER9 vector instructions",
                                         [FeatureISA3_0, FeatureP8Vector,
                                          FeatureP9Altivec]>;
 
 // Since new processors generally contain a superset of features of those that
 // came before them, the idea is to make implementations of new processors
 // less error prone and easier to read.
 // Namely:
 //     list<SubtargetFeature> Power8FeatureList = ...
 //     list<SubtargetFeature> FutureProcessorSpecificFeatureList =
 //         [ features that Power8 does not support ]
 //     list<SubtargetFeature> FutureProcessorFeatureList =
 //         !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
 
 // Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
 // well as providing a single point of definition if the feature set will be
 // used elsewhere.
 def ProcessorFeatures {
   list<SubtargetFeature> Power7FeatureList =
       [DirectivePwr7, FeatureAltivec, FeatureVSX,
        FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
        FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
        FeatureFPRND, FeatureFPCVT, FeatureISEL,
        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
        Feature64Bit /*, Feature64BitRegs */,
        FeatureBPERMD, FeatureExtDiv,
        FeatureMFTB, DeprecatedDST];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
        FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
        FeatureFusion];
   list<SubtargetFeature> Power8FeatureList =
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
   list<SubtargetFeature> Power9SpecificFeatures =
       [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
   list<SubtargetFeature> Power9FeatureList =
       !listconcat(Power8FeatureList, Power9SpecificFeatures);
 }
 
 // Note: Future features to add when support is extended to more
 // recent ISA levels:
 //
 // DFP          p6, p6x, p7        decimal floating-point instructions
 // POPCNTB      p5 through p7      popcntb and related instructions
 
 //===----------------------------------------------------------------------===//
 // Classes used for relation maps.
 //===----------------------------------------------------------------------===//
 // RecFormRel - Filter class used to relate non-record-form instructions with
 // their record-form variants.
 class RecFormRel;
 
 // AltVSXFMARel - Filter class used to relate the primary addend-killing VSX
 // FMA instruction forms with their corresponding factor-killing forms.
 class AltVSXFMARel {
   bit IsVSXFMAAlt = 0;
 }
 
 //===----------------------------------------------------------------------===//
 // Relation Map Definitions.
 //===----------------------------------------------------------------------===//
 
 def getRecordFormOpcode : InstrMapping {
   let FilterClass = "RecFormRel";
   // Instructions with the same BaseName and Interpretation64Bit values
   // form a row.
   let RowFields = ["BaseName", "Interpretation64Bit"];
   // Instructions with the same RC value form a column.
   let ColFields = ["RC"];
   // The key column are the non-record-form instructions.
   let KeyCol = ["0"];
   // Value columns RC=1
   let ValueCols = [["1"]];
 }
 
 def getNonRecordFormOpcode : InstrMapping {
   let FilterClass = "RecFormRel";
   // Instructions with the same BaseName and Interpretation64Bit values
   // form a row.
   let RowFields = ["BaseName", "Interpretation64Bit"];
   // Instructions with the same RC value form a column.
   let ColFields = ["RC"];
   // The key column are the record-form instructions.
   let KeyCol = ["1"];
   // Value columns are RC=0
   let ValueCols = [["0"]];
 }
 
 def getAltVSXFMAOpcode : InstrMapping {
   let FilterClass = "AltVSXFMARel";
   // Instructions with the same BaseName and Interpretation64Bit values
   // form a row.
   let RowFields = ["BaseName"];
   // Instructions with the same RC value form a column.
   let ColFields = ["IsVSXFMAAlt"];
   // The key column are the (default) addend-killing instructions.
   let KeyCol = ["0"];
   // Value columns IsVSXFMAAlt=1
   let ValueCols = [["1"]];
 }
 
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
 include "PPCRegisterInfo.td"
 include "PPCSchedule.td"
 include "PPCInstrInfo.td"
 
 //===----------------------------------------------------------------------===//
 // PowerPC processors supported.
 //
 
-def : Processor<"generic", G3Itineraries, [Directive32, FeatureMFTB]>;
+def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat,
+                                           FeatureMFTB]>;
 def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
                                           FeatureICBT, FeatureBookE, 
                                           FeatureMSYNC, FeatureMFTB]>;
 def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
                                           FeatureICBT, FeatureBookE, 
                                           FeatureMSYNC, FeatureMFTB]>;
-def : Processor<"601", G3Itineraries, [Directive601]>;
-def : Processor<"602", G3Itineraries, [Directive602,
+def : Processor<"601", G3Itineraries, [Directive601, FeatureHardFloat]>;
+def : Processor<"602", G3Itineraries, [Directive602, FeatureHardFloat,
                                        FeatureMFTB]>;
 def : Processor<"603", G3Itineraries, [Directive603,
                                        FeatureFRES, FeatureFRSQRTE,
                                        FeatureMFTB]>;
 def : Processor<"603e", G3Itineraries, [Directive603,
                                         FeatureFRES, FeatureFRSQRTE,
                                         FeatureMFTB]>;
 def : Processor<"603ev", G3Itineraries, [Directive603,
                                          FeatureFRES, FeatureFRSQRTE,
                                          FeatureMFTB]>;
 def : Processor<"604", G3Itineraries, [Directive604,
                                        FeatureFRES, FeatureFRSQRTE,
                                        FeatureMFTB]>;
 def : Processor<"604e", G3Itineraries, [Directive604,
                                         FeatureFRES, FeatureFRSQRTE,
                                         FeatureMFTB]>;
 def : Processor<"620", G3Itineraries, [Directive620,
                                        FeatureFRES, FeatureFRSQRTE,
                                        FeatureMFTB]>;
 def : Processor<"750", G4Itineraries, [Directive750,
                                        FeatureFRES, FeatureFRSQRTE,
                                        FeatureMFTB]>;
 def : Processor<"g3", G3Itineraries, [Directive750,
                                       FeatureFRES, FeatureFRSQRTE,
                                       FeatureMFTB]>;
 def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
                                         FeatureFRES, FeatureFRSQRTE,
                                         FeatureMFTB]>;
 def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
                                       FeatureFRES, FeatureFRSQRTE,
                                       FeatureMFTB]>;
 def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                             FeatureFRES, FeatureFRSQRTE,
                                             FeatureMFTB]>;
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                            FeatureFRES, FeatureFRSQRTE, 
                                            FeatureMFTB]>;
 
 def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt,
                    FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB]>;
 def : ProcessorModel<"g5", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
                    FeatureFRES, FeatureFRSQRTE,
                    Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE, 
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE, 
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"a2", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
                    FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
                    Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
 def : ProcessorModel<"a2q", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
                    FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
                    Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
                    FeatureMFTB]>;
 def : ProcessorModel<"pwr3", G5Model,
                   [DirectivePwr3, FeatureAltivec,
                    FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
                    FeatureSTFIWX, Feature64Bit]>;
 def : ProcessorModel<"pwr4", G5Model,
                   [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
                    FeatureSTFIWX, Feature64Bit, FeatureMFTB]>;
 def : ProcessorModel<"pwr5", G5Model,
                   [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureSTFIWX, Feature64Bit,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr5x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureSTFIWX, FeatureFPRND, Feature64Bit,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr6", G5Model,
                   [DirectivePwr6, FeatureAltivec,
                    FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
                    FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr6x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
 // FIXME: Same as P8 until the POWER9 scheduling info is available
 def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>;
-def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>;
+def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
+                                       FeatureMFTB]>;
+def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
+                                         FeatureMFTB]>;
 def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
                    FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB]>;
 def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
 
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
 
 include "PPCCallingConv.td"
 
 def PPCInstrInfo : InstrInfo {
   let isLittleEndianEncoding = 1;
 
   // FIXME: Unset this when no longer needed!
   let decodePositionallyEncodedOperands = 1;
 
   let noNamedPositionallyEncodedOperands = 1;
 }
 
 def PPCAsmParser : AsmParser {
   let ShouldEmitMatchRegisterName = 0;
 }
 
 def PPCAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
   // We do not use hard coded registers in asm strings.  However, some
   // InstAlias definitions use immediate literals.  Set RegisterPrefix
   // so that those are not misinterpreted as registers.
   string RegisterPrefix = "%";
   string BreakCharacters = ".";
 }
 
 def PPC : Target {
   // Information about the instructions.
   let InstructionSet = PPCInstrInfo;
 
   let AssemblyParsers = [PPCAsmParser];
   let AssemblyParserVariants = [PPCAsmParserVariant];
 }
Index: head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp	(revision 309148)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp	(revision 309149)
@@ -1,12122 +1,12122 @@
 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the PPCISelLowering class.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPCISelLowering.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCCallingConv.h"
 #include "PPCCCState.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
 #include "PPCTargetObjectFile.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include <list>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-lowering"
 
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 
 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 
 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 
 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
                                      const PPCSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
   // Use _setjmp/_longjmp instead of setjmp/longjmp.
   setUseUnderscoreSetJmp(true);
   setUseUnderscoreLongJmp(true);
 
   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
   // arguments are at least 4/8 bytes aligned.
   bool isPPC64 = Subtarget.isPPC64();
   setMinStackArgumentAlignment(isPPC64 ? 8:4);
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
   if (!useSoftFloat()) {
     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
   }
 
   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
   }
 
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // PowerPC has pre-inc load and store's.
   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 
   if (Subtarget.useCRBits()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
     if (isPPC64 || Subtarget.hasFPCVT()) {
       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
                          isPPC64 ? MVT::i64 : MVT::i32);
       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
                         isPPC64 ? MVT::i64 : MVT::i32);
     } else {
       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
     }
 
     // PowerPC does not support direct load / store of condition registers
     setOperationAction(ISD::LOAD, MVT::i1, Custom);
     setOperationAction(ISD::STORE, MVT::i1, Custom);
 
     // FIXME: Remove this once the ANDI glue bug is fixed:
     if (ANDIGlueBug)
       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 
     for (MVT VT : MVT::integer_valuetypes()) {
       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
       setTruncStoreAction(VT, MVT::i1, Expand);
     }
 
     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
   }
 
   // This is used in the ppcf128->int sequence.  Note it has different semantics
   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
   // We do not currently implement these libm ops for PowerPC.
   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 
   // PowerPC has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
   setOperationAction(ISD::SREM, MVT::i64, Expand);
   setOperationAction(ISD::UREM, MVT::i64, Expand);
 
   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 
   // We don't support sin/cos/sqrt/fmod/pow
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
   setOperationAction(ISD::FMA  , MVT::f64, Legal);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
   // If we're enabling GP optimizations, use hardware square root
   if (!Subtarget.hasFSQRT() &&
       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
         Subtarget.hasFRE()))
     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 
   if (!Subtarget.hasFSQRT() &&
       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
         Subtarget.hasFRES()))
     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 
   if (Subtarget.hasFCPSGN()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
   } else {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
   }
 
   if (Subtarget.hasFPRND()) {
     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 
     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
     setOperationAction(ISD::FROUND, MVT::f32, Legal);
   }
 
   // PowerPC does not have BSWAP, CTPOP or CTTZ
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 
   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
   } else {
     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   }
 
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 
   if (!Subtarget.useCRBits()) {
     // PowerPC does not have Select
     setOperationAction(ISD::SELECT, MVT::i32, Expand);
     setOperationAction(ISD::SELECT, MVT::i64, Expand);
     setOperationAction(ISD::SELECT, MVT::f32, Expand);
     setOperationAction(ISD::SELECT, MVT::f64, Expand);
   }
 
   // PowerPC wants to turn select_cc of FP into fsel when possible.
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 
   // PowerPC wants to optimize integer setcc a bit
   if (!Subtarget.useCRBits())
     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 
   // PowerPC does not have BRCOND which requires SetCC
   if (!Subtarget.useCRBits())
     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 
   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 
   // PowerPC does not have [U|S]INT_TO_FP
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
   if (Subtarget.hasDirectMove() && isPPC64) {
     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
   } else {
     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
   }
 
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
   // support continuation, user-level threading, and etc.. As a result, no
   // other SjLj exception interfaces are implemented and please don't build
   // your own exception handling based on them.
   // LLVM/Clang supports zero-cost DWARF exception handling.
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // We want to legalize GlobalAddress and ConstantPool nodes into the
   // appropriate instructions to materialize the address.
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 
   // TRAP is legal.
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
   // TRAMPOLINE is custom lowered.
   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
   if (Subtarget.isSVR4ABI()) {
     if (isPPC64) {
       // VAARG always uses double-word chunks, so promote anything smaller.
       setOperationAction(ISD::VAARG, MVT::i1, Promote);
       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
       setOperationAction(ISD::VAARG, MVT::i8, Promote);
       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
       setOperationAction(ISD::VAARG, MVT::i16, Promote);
       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
       setOperationAction(ISD::VAARG, MVT::i32, Promote);
       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
       setOperationAction(ISD::VAARG, MVT::Other, Expand);
     } else {
       // VAARG is custom lowered with the 32-bit SVR4 ABI.
       setOperationAction(ISD::VAARG, MVT::Other, Custom);
       setOperationAction(ISD::VAARG, MVT::i64, Custom);
     }
   } else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 
   if (Subtarget.isSVR4ABI() && !isPPC64)
     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
   else
     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 
   // Use the default implementation.
   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
   // To handle counter-based loop conditions.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 
   // Comparisons that require checking two conditions.
   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 
   if (Subtarget.has64BitSupport()) {
     // They also have instructions for converting between i64 and fp.
     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
     // This is just the low 32 bits of a (signed) fp->i64 conversion.
     // We cannot do this with Promote because i64 is not a legal type.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   }
 
   // With the instructions enabled under FPCVT, we can do everything.
   if (Subtarget.hasFPCVT()) {
     if (Subtarget.has64BitSupport()) {
       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
     }
 
     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
   }
 
   if (Subtarget.use64BitRegs()) {
     // 64-bit PowerPC implementations can support i64 types directly
     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
     // 64-bit PowerPC wants to expand i128 shifts itself.
     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
   } else {
     // 32-bit PowerPC wants to expand i64 shifts itself.
     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   }
 
   if (Subtarget.hasAltivec()) {
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.
     for (MVT VT : MVT::vector_valuetypes()) {
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD, VT, Legal);
       setOperationAction(ISD::SUB, VT, Legal);
 
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
         setOperationAction(ISD::CTPOP, VT, Legal);
         setOperationAction(ISD::CTLZ, VT, Legal);
       }
       else {
         setOperationAction(ISD::CTPOP, VT, Expand);
         setOperationAction(ISD::CTLZ, VT, Expand);
       }
 
       // We promote all shuffles to v16i8.
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 
       // We promote all non-typed operations to v4i32.
       setOperationAction(ISD::AND   , VT, Promote);
       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
       setOperationAction(ISD::OR    , VT, Promote);
       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
       setOperationAction(ISD::XOR   , VT, Promote);
       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
       setOperationAction(ISD::LOAD  , VT, Promote);
       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
       setOperationAction(ISD::SELECT_CC, VT, Promote);
       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
       setOperationAction(ISD::STORE, VT, Promote);
       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 
       // No other operations are legal.
       setOperationAction(ISD::MUL , VT, Expand);
       setOperationAction(ISD::SDIV, VT, Expand);
       setOperationAction(ISD::SREM, VT, Expand);
       setOperationAction(ISD::UDIV, VT, Expand);
       setOperationAction(ISD::UREM, VT, Expand);
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
       setOperationAction(ISD::FNEG, VT, Expand);
       setOperationAction(ISD::FSQRT, VT, Expand);
       setOperationAction(ISD::FLOG, VT, Expand);
       setOperationAction(ISD::FLOG10, VT, Expand);
       setOperationAction(ISD::FLOG2, VT, Expand);
       setOperationAction(ISD::FEXP, VT, Expand);
       setOperationAction(ISD::FEXP2, VT, Expand);
       setOperationAction(ISD::FSIN, VT, Expand);
       setOperationAction(ISD::FCOS, VT, Expand);
       setOperationAction(ISD::FABS, VT, Expand);
       setOperationAction(ISD::FPOWI, VT, Expand);
       setOperationAction(ISD::FFLOOR, VT, Expand);
       setOperationAction(ISD::FCEIL,  VT, Expand);
       setOperationAction(ISD::FTRUNC, VT, Expand);
       setOperationAction(ISD::FRINT,  VT, Expand);
       setOperationAction(ISD::FNEARBYINT, VT, Expand);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
       setOperationAction(ISD::MULHU, VT, Expand);
       setOperationAction(ISD::MULHS, VT, Expand);
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
       setOperationAction(ISD::UDIVREM, VT, Expand);
       setOperationAction(ISD::SDIVREM, VT, Expand);
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::BSWAP, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::VSELECT, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
       setOperationAction(ISD::ROTL, VT, Expand);
       setOperationAction(ISD::ROTR, VT, Expand);
 
       for (MVT InnerVT : MVT::vector_valuetypes()) {
         setTruncStoreAction(VT, InnerVT, Expand);
         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
       }
     }
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 
     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
     setOperationAction(ISD::SELECT, MVT::v4i32,
                        Subtarget.useCRBits() ? Legal : Expand);
     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 
     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 
     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
     }
 
     if (Subtarget.hasP8Altivec())
       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
     else
       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 
     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 
     // Altivec does not contain unordered floating-point compare instructions
     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 
     if (Subtarget.hasVSX()) {
       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
       if (Subtarget.hasP8Vector()) {
         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
       }
       if (Subtarget.hasDirectMove() && isPPC64) {
         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
       }
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 
       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 
       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 
       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 
       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 
       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 
       // Share the Altivec comparison restrictions.
       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 
       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 
       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 
       if (Subtarget.hasP8Vector())
         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 
       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 
       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 
       if (Subtarget.hasP8Altivec()) {
         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 
         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
       }
       else {
         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 
         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 
         // VSX v2i64 only supports non-arithmetic operations.
         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
       }
 
       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 
       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 
       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 
       // Vector operation legalization checks the result type of
       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 
       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 
       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
     }
 
     if (Subtarget.hasP8Altivec()) {
       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
     }
     if (Subtarget.hasP9Vector()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
     }
   }
 
   if (Subtarget.hasQPX()) {
     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 
     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 
     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 
     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 
     if (!Subtarget.useCRBits())
       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 
     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 
     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 
     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 
     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 
     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 
     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 
     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 
     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 
     if (!Subtarget.useCRBits())
       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 
     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 
     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 
     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
 
     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
 
     setOperationAction(ISD::AND , MVT::v4i1, Legal);
     setOperationAction(ISD::OR , MVT::v4i1, Legal);
     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
 
     if (!Subtarget.useCRBits())
       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
 
     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
 
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
 
     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
 
     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
 
     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 
     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
 
     // These need to set FE_INEXACT, and so cannot be vectorized here.
     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
 
     if (TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
 
       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
     } else {
       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
 
       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
     }
   }
 
   if (Subtarget.has64BitSupport())
     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 
   if (!isPPC64) {
     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
   }
 
   setBooleanContents(ZeroOrOneBooleanContent);
 
   if (Subtarget.hasAltivec()) {
     // Altivec instructions set fields to all zeros or all ones.
     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   }
 
   if (!isPPC64) {
     // These libcalls are not available in 32-bit.
     setLibcallName(RTLIB::SHL_I128, nullptr);
     setLibcallName(RTLIB::SRL_I128, nullptr);
     setLibcallName(RTLIB::SRA_I128, nullptr);
   }
 
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   if (Subtarget.hasFPCVT())
     setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::BR_CC);
   if (Subtarget.useCRBits())
     setTargetDAGCombine(ISD::BRCOND);
   setTargetDAGCombine(ISD::BSWAP);
   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
   setTargetDAGCombine(ISD::INTRINSIC_VOID);
 
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
 
   if (Subtarget.useCRBits()) {
     setTargetDAGCombine(ISD::TRUNCATE);
     setTargetDAGCombine(ISD::SETCC);
     setTargetDAGCombine(ISD::SELECT_CC);
   }
 
   // Use reciprocal estimates.
   if (TM.Options.UnsafeFPMath) {
     setTargetDAGCombine(ISD::FDIV);
     setTargetDAGCombine(ISD::FSQRT);
   }
 
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
   }
 
   // With 32 condition bits, we don't need to sink (and duplicate) compares
   // aggressively in CodeGenPrep.
   if (Subtarget.useCRBits()) {
     setHasMultipleConditionRegisters();
     setJumpIsExpensive();
   }
 
   setMinFunctionAlignment(2);
   if (Subtarget.isDarwin())
     setPrefFunctionAlignment(4);
 
   switch (Subtarget.getDarwinDirective()) {
   default: break;
   case PPC::DIR_970:
   case PPC::DIR_A2:
   case PPC::DIR_E500mc:
   case PPC::DIR_E5500:
   case PPC::DIR_PWR4:
   case PPC::DIR_PWR5:
   case PPC::DIR_PWR5X:
   case PPC::DIR_PWR6:
   case PPC::DIR_PWR6X:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
   case PPC::DIR_PWR9:
     setPrefFunctionAlignment(4);
     setPrefLoopAlignment(4);
     break;
   }
 
   if (Subtarget.enableMachineScheduler())
     setSchedulingPreference(Sched::Source);
   else
     setSchedulingPreference(Sched::Hybrid);
 
   computeRegisterProperties(STI.getRegisterInfo());
 
   // The Freescale cores do better with aggressive inlining of memcpy and
   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
     MaxStoresPerMemset = 32;
     MaxStoresPerMemsetOptSize = 16;
     MaxStoresPerMemcpy = 32;
     MaxStoresPerMemcpyOptSize = 8;
     MaxStoresPerMemmove = 32;
     MaxStoresPerMemmoveOptSize = 8;
   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
     // The A2 also benefits from (very) aggressive inlining of memcpy and
     // friends. The overhead of a the function call, even when warm, can be
     // over one hundred cycles.
     MaxStoresPerMemset = 128;
     MaxStoresPerMemcpy = 128;
     MaxStoresPerMemmove = 128;
   }
 }
 
 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
 /// the desired ByVal argument alignment.
 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
                              unsigned MaxMaxAlign) {
   if (MaxAlign == MaxMaxAlign)
     return;
   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
       MaxAlign = 32;
     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
       MaxAlign = 16;
   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     unsigned EltAlign = 0;
     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
     if (EltAlign > MaxAlign)
       MaxAlign = EltAlign;
   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
     for (auto *EltTy : STy->elements()) {
       unsigned EltAlign = 0;
       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
       if (EltAlign > MaxAlign)
         MaxAlign = EltAlign;
       if (MaxAlign == MaxMaxAlign)
         break;
     }
   }
 }
 
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.
 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
                                                   const DataLayout &DL) const {
   // Darwin passes everything on 4 byte boundary.
   if (Subtarget.isDarwin())
     return 4;
 
   // 16byte and wider vectors are passed on 16byte boundary.
   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
   return Align;
 }
 
 bool PPCTargetLowering::useSoftFloat() const {
   return Subtarget.useSoftFloat();
 }
 
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
   case PPCISD::FSEL:            return "PPCISD::FSEL";
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
   case PPCISD::FRE:             return "PPCISD::FRE";
   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
   case PPCISD::VPERM:           return "PPCISD::VPERM";
   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
   case PPCISD::SRL:             return "PPCISD::SRL";
   case PPCISD::SRA:             return "PPCISD::SRA";
   case PPCISD::SHL:             return "PPCISD::SHL";
   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
   case PPCISD::CALL:            return "PPCISD::CALL";
   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
   case PPCISD::LBRX:            return "PPCISD::LBRX";
   case PPCISD::STBRX:           return "PPCISD::STBRX";
   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
   case PPCISD::BDZ:             return "PPCISD::BDZ";
   case PPCISD::MFFS:            return "PPCISD::MFFS";
   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
   case PPCISD::SC:              return "PPCISD::SC";
   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
   }
   return nullptr;
 }
 
 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
                                           EVT VT) const {
   if (!VT.isVector())
     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
 
   if (Subtarget.hasQPX())
     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
 
   return VT.changeVectorElementTypeToInteger();
 }
 
 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
   return true;
 }
 
 //===----------------------------------------------------------------------===//
 // Node matching predicates, for use by the tblgen matching code.
 //===----------------------------------------------------------------------===//
 
 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
 static bool isFloatingPointZero(SDValue Op) {
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
     return CFP->getValueAPF().isZero();
   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
     // Maybe this has already been legalized into the constant pool?
     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
         return CFP->getValueAPF().isZero();
   }
   return false;
 }
 
 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
 /// true if Op is undef or if it matches the specified value.
 static bool isConstantOrUndef(int Op, int Val) {
   return Op < 0 || Op == Val;
 }
 
 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
 /// VPKUHUM instruction.
 /// The ShuffleKind distinguishes between big-endian operations with
 /// two different inputs (0), either-endian operations with two identical
 /// inputs (1), and little-endian operations with two different inputs (2).
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
   bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
     for (unsigned i = 0; i != 16; ++i)
       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
         return false;
   } else if (ShuffleKind == 2) {
     if (!IsLE)
       return false;
     for (unsigned i = 0; i != 16; ++i)
       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
         return false;
   } else if (ShuffleKind == 1) {
     unsigned j = IsLE ? 0 : 1;
     for (unsigned i = 0; i != 8; ++i)
       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
         return false;
   }
   return true;
 }
 
 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
 /// VPKUWUM instruction.
 /// The ShuffleKind distinguishes between big-endian operations with
 /// two different inputs (0), either-endian operations with two identical
 /// inputs (1), and little-endian operations with two different inputs (2).
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
   bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
     for (unsigned i = 0; i != 16; i += 2)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
         return false;
   } else if (ShuffleKind == 2) {
     if (!IsLE)
       return false;
     for (unsigned i = 0; i != 16; i += 2)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
         return false;
   } else if (ShuffleKind == 1) {
     unsigned j = IsLE ? 0 : 2;
     for (unsigned i = 0; i != 8; i += 2)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
         return false;
   }
   return true;
 }
 
 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
 /// current subtarget.
 ///
 /// The ShuffleKind distinguishes between big-endian operations with
 /// two different inputs (0), either-endian operations with two identical
 /// inputs (1), and little-endian operations with two different inputs (2).
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
   const PPCSubtarget& Subtarget =
     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
   if (!Subtarget.hasP8Vector())
     return false;
 
   bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
     for (unsigned i = 0; i != 16; i += 4)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
         return false;
   } else if (ShuffleKind == 2) {
     if (!IsLE)
       return false;
     for (unsigned i = 0; i != 16; i += 4)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
         return false;
   } else if (ShuffleKind == 1) {
     unsigned j = IsLE ? 0 : 4;
     for (unsigned i = 0; i != 8; i += 4)
       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
         return false;
   }
   return true;
 }
 
 /// isVMerge - Common function, used to match vmrg* shuffles.
 ///
 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
                      unsigned LHSStart, unsigned RHSStart) {
   if (N->getValueType(0) != MVT::v16i8)
     return false;
   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
          "Unsupported merge size!");
 
   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
                              LHSStart+j+i*UnitSize) ||
           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
                              RHSStart+j+i*UnitSize))
         return false;
     }
   return true;
 }
 
 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
 /// The ShuffleKind distinguishes between big-endian merges with two
 /// different inputs (0), either-endian merges with two identical inputs (1),
 /// and little-endian merges with two different inputs (2).  For the latter,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
   if (DAG.getDataLayout().isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 0, 0);
     else if (ShuffleKind == 2) // swapped
       return isVMerge(N, UnitSize, 0, 16);
     else
       return false;
   } else {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 8, 8);
     else if (ShuffleKind == 0) // normal
       return isVMerge(N, UnitSize, 8, 24);
     else
       return false;
   }
 }
 
 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
 /// The ShuffleKind distinguishes between big-endian merges with two
 /// different inputs (0), either-endian merges with two identical inputs (1),
 /// and little-endian merges with two different inputs (2).  For the latter,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
   if (DAG.getDataLayout().isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 8, 8);
     else if (ShuffleKind == 2) // swapped
       return isVMerge(N, UnitSize, 8, 24);
     else
       return false;
   } else {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 0, 0);
     else if (ShuffleKind == 0) // normal
       return isVMerge(N, UnitSize, 0, 16);
     else
       return false;
   }
 }
 
 /**
  * \brief Common function used to match vmrgew and vmrgow shuffles
  *
  * The indexOffset determines whether to look for even or odd words in
  * the shuffle mask. This is based on the of the endianness of the target
  * machine.
  *   - Little Endian:
  *     - Use offset of 0 to check for odd elements
  *     - Use offset of 4 to check for even elements
  *   - Big Endian:
  *     - Use offset of 0 to check for even elements
  *     - Use offset of 4 to check for odd elements
  * A detailed description of the vector element ordering for little endian and
  * big endian can be found at
  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
  * Targeting your applications - what little endian and big endian IBM XL C/C++
  * compiler differences mean to you
  *
  * The mask to the shuffle vector instruction specifies the indices of the
  * elements from the two input vectors to place in the result. The elements are
  * numbered in array-access order, starting with the first vector. These vectors
  * are always of type v16i8, thus each vector will contain 16 elements of size
  * 8. More info on the shuffle vector can be found in the
  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
  * Language Reference.
  *
  * The RHSStartValue indicates whether the same input vectors are used (unary)
  * or two different input vectors are used, based on the following:
  *   - If the instruction uses the same vector for both inputs, the range of the
  *     indices will be 0 to 15. In this case, the RHSStart value passed should
  *     be 0.
  *   - If the instruction has two different vectors then the range of the
  *     indices will be 0 to 31. In this case, the RHSStart value passed should
  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
  *     to 31 specify elements in the second vector).
  *
  * \param[in] N The shuffle vector SD Node to analyze
  * \param[in] IndexOffset Specifies whether to look for even or odd elements
  * \param[in] RHSStartValue Specifies the starting index for the righthand input
  * vector to the shuffle_vector instruction
  * \return true iff this shuffle vector represents an even or odd word merge
  */
 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
                      unsigned RHSStartValue) {
   if (N->getValueType(0) != MVT::v16i8)
     return false;
 
   for (unsigned i = 0; i < 2; ++i)
     for (unsigned j = 0; j < 4; ++j)
       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
                              i*RHSStartValue+j+IndexOffset) ||
           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
                              i*RHSStartValue+j+IndexOffset+8))
         return false;
   return true;
 }
 
 /**
  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
  * vmrgow instructions.
  *
  * \param[in] N The shuffle vector SD Node to analyze
  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
  * \param[in] ShuffleKind Identify the type of merge:
  *   - 0 = big-endian merge with two different inputs;
  *   - 1 = either-endian merge with two identical inputs;
  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
  *     little-endian merges).
  * \param[in] DAG The current SelectionDAG
  * \return true iff this shuffle mask
  */
 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
                               unsigned ShuffleKind, SelectionDAG &DAG) {
   if (DAG.getDataLayout().isLittleEndian()) {
     unsigned indexOffset = CheckEven ? 4 : 0;
     if (ShuffleKind == 1) // Unary
       return isVMerge(N, indexOffset, 0);
     else if (ShuffleKind == 2) // swapped
       return isVMerge(N, indexOffset, 16);
     else
       return false;
   }
   else {
     unsigned indexOffset = CheckEven ? 0 : 4;
     if (ShuffleKind == 1) // Unary
       return isVMerge(N, indexOffset, 0);
     else if (ShuffleKind == 0) // Normal
       return isVMerge(N, indexOffset, 16);
     else
       return false;
   }
   return false;
 }
 
 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 /// amount, otherwise return -1.
 /// The ShuffleKind distinguishes between big-endian operations with two
 /// different inputs (0), either-endian operations with two identical inputs
 /// (1), and little-endian operations with two different inputs (2).  For the
 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
                              SelectionDAG &DAG) {
   if (N->getValueType(0) != MVT::v16i8)
     return -1;
 
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
 
   // Find the first non-undef value in the shuffle mask.
   unsigned i;
   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
     /*search*/;
 
   if (i == 16) return -1;  // all undef.
 
   // Otherwise, check to see if the rest of the elements are consecutively
   // numbered from this value.
   unsigned ShiftAmt = SVOp->getMaskElt(i);
   if (ShiftAmt < i) return -1;
 
   ShiftAmt -= i;
   bool isLE = DAG.getDataLayout().isLittleEndian();
 
   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
     // Check the rest of the elements to see if they are consecutive.
     for (++i; i != 16; ++i)
       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
         return -1;
   } else if (ShuffleKind == 1) {
     // Check the rest of the elements to see if they are consecutive.
     for (++i; i != 16; ++i)
       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
         return -1;
   } else
     return -1;
 
   if (isLE)
     ShiftAmt = 16 - ShiftAmt;
 
   return ShiftAmt;
 }
 
 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a splat of a single element that is suitable for input to
 /// VSPLTB/VSPLTH/VSPLTW.
 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
   assert(N->getValueType(0) == MVT::v16i8 &&
          (EltSize == 1 || EltSize == 2 || EltSize == 4));
 
   // The consecutive indices need to specify an element, not part of two
   // different elements.  So abandon ship early if this isn't the case.
   if (N->getMaskElt(0) % EltSize != 0)
     return false;
 
   // This is a splat operation if each element of the permute is the same, and
   // if the value doesn't reference the second vector.
   unsigned ElementBase = N->getMaskElt(0);
 
   // FIXME: Handle UNDEF elements too!
   if (ElementBase >= 16)
     return false;
 
   // Check that the indices are consecutive, in the case of a multi-byte element
   // splatted with a v16i8 mask.
   for (unsigned i = 1; i != EltSize; ++i)
     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
       return false;
 
   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
     if (N->getMaskElt(i) < 0) continue;
     for (unsigned j = 0; j != EltSize; ++j)
       if (N->getMaskElt(i+j) != N->getMaskElt(j))
         return false;
   }
   return true;
 }
 
 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
 
   // Check that the mask is shuffling words
   for (unsigned i = 0; i < 4; ++i) {
     unsigned B0 = N->getMaskElt(i*4);
     unsigned B1 = N->getMaskElt(i*4+1);
     unsigned B2 = N->getMaskElt(i*4+2);
     unsigned B3 = N->getMaskElt(i*4+3);
     if (B0 % 4)
       return false;
     if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
       return false;
   }
 
   // Now we look at mask elements 0,4,8,12
   unsigned M0 = N->getMaskElt(0) / 4;
   unsigned M1 = N->getMaskElt(4) / 4;
   unsigned M2 = N->getMaskElt(8) / 4;
   unsigned M3 = N->getMaskElt(12) / 4;
   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
 
   // Below, let H and L be arbitrary elements of the shuffle mask
   // where H is in the range [4,7] and L is in the range [0,3].
   // H, 1, 2, 3 or L, 5, 6, 7
   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
     InsertAtByte = IsLE ? 12 : 0;
     Swap = M0 < 4;
     return true;
   }
   // 0, H, 2, 3 or 4, L, 6, 7
   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
     InsertAtByte = IsLE ? 8 : 4;
     Swap = M1 < 4;
     return true;
   }
   // 0, 1, H, 3 or 4, 5, L, 7
   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
     InsertAtByte = IsLE ? 4 : 8;
     Swap = M2 < 4;
     return true;
   }
   // 0, 1, 2, H or 4, 5, 6, L
   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
     InsertAtByte = IsLE ? 0 : 12;
     Swap = M3 < 4;
     return true;
   }
 
   // If both vector operands for the shuffle are the same vector, the mask will
   // contain only elements from the first one and the second one will be undef.
   if (N->getOperand(1).isUndef()) {
     ShiftElts = 0;
     Swap = true;
     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
       InsertAtByte = IsLE ? 12 : 0;
       return true;
     }
     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
       InsertAtByte = IsLE ? 8 : 4;
       return true;
     }
     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
       InsertAtByte = IsLE ? 4 : 8;
       return true;
     }
     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
       InsertAtByte = IsLE ? 0 : 12;
       return true;
     }
   }
 
   return false;
 }
 
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
                                 SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   assert(isSplatShuffleMask(SVOp, EltSize));
   if (DAG.getDataLayout().isLittleEndian())
     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
   else
     return SVOp->getMaskElt(0) / EltSize;
 }
 
 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
 /// by using a vspltis[bhw] instruction of the specified element size, return
 /// the constant being splatted.  The ByteSize field indicates the number of
 /// bytes of each element [124] -> [bhw].
 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
   SDValue OpVal(nullptr, 0);
 
   // If ByteSize of the splat is bigger than the element size of the
   // build_vector, then we have a case where we are checking for a splat where
   // multiple elements of the buildvector are folded together into a single
   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
   unsigned EltSize = 16/N->getNumOperands();
   if (EltSize < ByteSize) {
     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
     SDValue UniquedVals[4];
     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
 
     // See if all of the elements in the buildvector agree across.
     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
       if (N->getOperand(i).isUndef()) continue;
       // If the element isn't a constant, bail fully out.
       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
 
 
       if (!UniquedVals[i&(Multiple-1)].getNode())
         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
         return SDValue();  // no match.
     }
 
     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
     // either constant or undef values that are identical for each chunk.  See
     // if these chunks can form into a larger vspltis*.
 
     // Check to see if all of the leading entries are either 0 or -1.  If
     // neither, then this won't fit into the immediate field.
     bool LeadingZero = true;
     bool LeadingOnes = true;
     for (unsigned i = 0; i != Multiple-1; ++i) {
       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
 
       LeadingZero &= isNullConstant(UniquedVals[i]);
       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
     }
     // Finally, check the least significant entry.
     if (LeadingZero) {
       if (!UniquedVals[Multiple-1].getNode())
         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
     }
     if (LeadingOnes) {
       if (!UniquedVals[Multiple-1].getNode())
         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
     }
 
     return SDValue();
   }
 
   // Check to see if this buildvec has a single non-undef value in its elements.
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     if (N->getOperand(i).isUndef()) continue;
     if (!OpVal.getNode())
       OpVal = N->getOperand(i);
     else if (OpVal != N->getOperand(i))
       return SDValue();
   }
 
   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
 
   unsigned ValSizeInBytes = EltSize;
   uint64_t Value = 0;
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
     Value = CN->getZExtValue();
   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
     Value = FloatToBits(CN->getValueAPF().convertToFloat());
   }
 
   // If the splat value is larger than the element value, then we can never do
   // this splat.  The only case that we could fit the replicated bits into our
   // immediate field for would be zero, and we prefer to use vxor for it.
   if (ValSizeInBytes < ByteSize) return SDValue();
 
   // If the element value is larger than the splat value, check if it consists
   // of a repeated bit pattern of size ByteSize.
   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
     return SDValue();
 
   // Properly sign extend the value.
   int MaskVal = SignExtend32(Value, ByteSize * 8);
 
   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
   if (MaskVal == 0) return SDValue();
 
   // Finally, if this value fits in a 5 bit sext field, return it
   if (SignExtend32<5>(MaskVal) == MaskVal)
     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
   return SDValue();
 }
 
 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
 /// amount, otherwise return -1.
 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
   EVT VT = N->getValueType(0);
   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
     return -1;
 
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
 
   // Find the first non-undef value in the shuffle mask.
   unsigned i;
   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
     /*search*/;
 
   if (i == 4) return -1;  // all undef.
 
   // Otherwise, check to see if the rest of the elements are consecutively
   // numbered from this value.
   unsigned ShiftAmt = SVOp->getMaskElt(i);
   if (ShiftAmt < i) return -1;
   ShiftAmt -= i;
 
   // Check the rest of the elements to see if they are consecutive.
   for (++i; i != 4; ++i)
     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
       return -1;
 
   return ShiftAmt;
 }
 
 //===----------------------------------------------------------------------===//
 //  Addressing Mode Selection
 //===----------------------------------------------------------------------===//
 
 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
 /// or 64-bit immediate, and if the value can be accurately represented as a
 /// sign extension from a 16-bit value.  If so, this returns true and the
 /// immediate.
 static bool isIntS16Immediate(SDNode *N, short &Imm) {
   if (!isa<ConstantSDNode>(N))
     return false;
 
   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
   if (N->getValueType(0) == MVT::i32)
     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
   else
     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
 }
 static bool isIntS16Immediate(SDValue Op, short &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented with [r+imm].
 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
                                             SDValue &Index,
                                             SelectionDAG &DAG) const {
   short imm = 0;
   if (N.getOpcode() == ISD::ADD) {
     if (isIntS16Immediate(N.getOperand(1), imm))
       return false;    // r+i
     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
       return false;    // r+i
 
     Base = N.getOperand(0);
     Index = N.getOperand(1);
     return true;
   } else if (N.getOpcode() == ISD::OR) {
     if (isIntS16Immediate(N.getOperand(1), imm))
       return false;    // r+i can fold it if we can.
 
     // If this is an or of disjoint bitfields, we can codegen this as an add
     // (for better address arithmetic) if the LHS and RHS of the OR are provably
     // disjoint.
     APInt LHSKnownZero, LHSKnownOne;
     APInt RHSKnownZero, RHSKnownOne;
     DAG.computeKnownBits(N.getOperand(0),
                          LHSKnownZero, LHSKnownOne);
 
     if (LHSKnownZero.getBoolValue()) {
       DAG.computeKnownBits(N.getOperand(1),
                            RHSKnownZero, RHSKnownOne);
       // If all of the bits are known zero on the LHS or RHS, the add won't
       // carry.
       if (~(LHSKnownZero | RHSKnownZero) == 0) {
         Base = N.getOperand(0);
         Index = N.getOperand(1);
         return true;
       }
     }
   }
 
   return false;
 }
 
 // If we happen to be doing an i64 load or store into a stack slot that has
 // less than a 4-byte alignment, then the frame-index elimination may need to
 // use an indexed load or store instruction (because the offset may not be a
 // multiple of 4). The extra register needed to hold the offset comes from the
 // register scavenger, and it is possible that the scavenger will need to use
 // an emergency spill slot. As a result, we need to make sure that a spill slot
 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
 // stack slot.
 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
   // FIXME: This does not handle the LWA case.
   if (VT != MVT::i64)
     return;
 
   // NOTE: We'll exclude negative FIs here, which come from argument
   // lowering, because there are no known test cases triggering this problem
   // using packed structures (or similar). We can remove this exclusion if
   // we find such a test case. The reason why this is so test-case driven is
   // because this entire 'fixup' is only to prevent crashes (from the
   // register scavenger) on not-really-valid inputs. For example, if we have:
   //   %a = alloca i1
   //   %b = bitcast i1* %a to i64*
   //   store i64* a, i64 b
   // then the store should really be marked as 'align 1', but is not. If it
   // were marked as 'align 1' then the indexed form would have been
   // instruction-selected initially, and the problem this 'fixup' is preventing
   // won't happen regardless.
   if (FrameIdx < 0)
     return;
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   unsigned Align = MFI->getObjectAlignment(FrameIdx);
   if (Align >= 4)
     return;
 
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasNonRISpills();
 }
 
 /// Returns true if the address N can be represented by a base register plus
 /// a signed 16-bit displacement [r+imm], and if it is not better
 /// represented as reg+reg.  If Aligned is true, only accept displacements
 /// suitable for STD and friends, i.e. multiples of 4.
 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                             SDValue &Base,
                                             SelectionDAG &DAG,
                                             bool Aligned) const {
   // FIXME dl should come from parent load or store, not from address
   SDLoc dl(N);
   // If this can be more profitably realized as r+r, fail.
   if (SelectAddressRegReg(N, Disp, Base, DAG))
     return false;
 
   if (N.getOpcode() == ISD::ADD) {
     short imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
         (!Aligned || (imm & 3) == 0)) {
       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
       } else {
         Base = N.getOperand(0);
       }
       return true; // [r+i]
     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
       // Match LOAD (ADD (X, Lo(G))).
       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
              && "Cannot handle constant offsets yet!");
       Disp = N.getOperand(1).getOperand(0);  // The global address.
       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
              Disp.getOpcode() == ISD::TargetConstantPool ||
              Disp.getOpcode() == ISD::TargetJumpTable);
       Base = N.getOperand(0);
       return true;  // [&g+r]
     }
   } else if (N.getOpcode() == ISD::OR) {
     short imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
         (!Aligned || (imm & 3) == 0)) {
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
       APInt LHSKnownZero, LHSKnownOne;
       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
 
       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
         // carry.
         if (FrameIndexSDNode *FI =
               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
         } else {
           Base = N.getOperand(0);
         }
         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
         return true;
       }
     }
   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
     // Loading from a constant address.
 
     // If this address fits entirely in a 16-bit sext immediate field, codegen
     // this as "d, 0"
     short Imm;
     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
       return true;
     }
 
     // Handle 32-bit sext immediates with LIS + addr mode.
     if ((CN->getValueType(0) == MVT::i32 ||
          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
       int Addr = (int)CN->getZExtValue();
 
       // Otherwise, break this down into an LIS + disp.
       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
 
       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
                                    MVT::i32);
       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
       return true;
     }
   }
 
   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
   } else
     Base = N;
   return true;      // [r+0]
 }
 
 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
 /// represented as an indexed [r+r] operation.
 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
                                                 SDValue &Index,
                                                 SelectionDAG &DAG) const {
   // Check to see if we can easily represent this as an [r+r] address.  This
   // will fail if it thinks that the address is more profitably represented as
   // reg+imm, e.g. where imm = 0.
   if (SelectAddressRegReg(N, Base, Index, DAG))
     return true;
 
   // If the operand is an addition, always emit this as [r+r], since this is
   // better (for code size, and execution, as the memop does the add for free)
   // than emitting an explicit add.
   if (N.getOpcode() == ISD::ADD) {
     Base = N.getOperand(0);
     Index = N.getOperand(1);
     return true;
   }
 
   // Otherwise, do it the hard way, using R0 as the base register.
   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                          N.getValueType());
   Index = N;
   return true;
 }
 
 /// getPreIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if the node's address
 /// can be legally represented as pre-indexed load / store address.
 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                                   SDValue &Offset,
                                                   ISD::MemIndexedMode &AM,
                                                   SelectionDAG &DAG) const {
   if (DisablePPCPreinc) return false;
 
   bool isLoad = true;
   SDValue Ptr;
   EVT VT;
   unsigned Alignment;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     VT = LD->getMemoryVT();
     Alignment = LD->getAlignment();
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     VT  = ST->getMemoryVT();
     Alignment = ST->getAlignment();
     isLoad = false;
   } else
     return false;
 
   // PowerPC doesn't have preinc load/store instructions for vectors (except
   // for QPX, which does have preinc r+r forms).
   if (VT.isVector()) {
     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
       return false;
     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
       AM = ISD::PRE_INC;
       return true;
     }
   }
 
   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
 
     // Common code will reject creating a pre-inc form if the base pointer
     // is a frame index, or if N is a store and the base pointer is either
     // the same as or a predecessor of the value being stored.  Check for
     // those situations here, and try with swapped Base/Offset instead.
     bool Swap = false;
 
     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
       Swap = true;
     else if (!isLoad) {
       SDValue Val = cast<StoreSDNode>(N)->getValue();
       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
         Swap = true;
     }
 
     if (Swap)
       std::swap(Base, Offset);
 
     AM = ISD::PRE_INC;
     return true;
   }
 
   // LDU/STU can only handle immediates that are a multiple of 4.
   if (VT != MVT::i64) {
     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
       return false;
   } else {
     // LDU/STU need an address with at least 4-byte alignment.
     if (Alignment < 4)
       return false;
 
     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
       return false;
   }
 
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
     // sext i32 to i64 when addr mode is r+i.
     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
         LD->getExtensionType() == ISD::SEXTLOAD &&
         isa<ConstantSDNode>(Offset))
       return false;
   }
 
   AM = ISD::PRE_INC;
   return true;
 }
 
 //===----------------------------------------------------------------------===//
 //  LowerOperation implementation
 //===----------------------------------------------------------------------===//
 
 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
 /// and LoOpFlags to the target MO flags.
 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
                                unsigned &HiOpFlags, unsigned &LoOpFlags,
                                const GlobalValue *GV = nullptr) {
   HiOpFlags = PPCII::MO_HA;
   LoOpFlags = PPCII::MO_LO;
 
   // Don't use the pic base if not in PIC relocation model.
   if (IsPIC) {
     HiOpFlags |= PPCII::MO_PIC_FLAG;
     LoOpFlags |= PPCII::MO_PIC_FLAG;
   }
 
   // If this is a reference to a global value that requires a non-lazy-ptr, make
   // sure that instruction lowering adds it.
   if (GV && Subtarget.hasLazyResolverStub(GV)) {
     HiOpFlags |= PPCII::MO_NLP_FLAG;
     LoOpFlags |= PPCII::MO_NLP_FLAG;
 
     if (GV->hasHiddenVisibility()) {
       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
     }
   }
 }
 
 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
                              SelectionDAG &DAG) {
   SDLoc DL(HiPart);
   EVT PtrVT = HiPart.getValueType();
   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
 
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
 
   // With PIC, the first instruction is actually "GR+hi(&G)".
   if (isPIC)
     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
 
   // Generate non-pic code that has direct accesses to the constant pool.
   // The address of the global is just (hi(&g)+lo(&g)).
   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
 }
 
 static void setUsesTOCBasePtr(MachineFunction &MF) {
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setUsesTOCBasePtr();
 }
 
 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
   setUsesTOCBasePtr(DAG.getMachineFunction());
 }
 
 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
                            SDValue GA) {
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
 
   SDValue Ops[] = { GA, Reg };
   return DAG.getMemIntrinsicNode(
       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
       false, 0);
 }
 
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   const Constant *C = CP->getConstVal();
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
     return getTOCEntry(DAG, SDLoc(CP), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
   bool IsPIC = isPositionIndependent();
   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
 
   if (IsPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
                                            PPCII::MO_PIC_FLAG);
     return getTOCEntry(DAG, SDLoc(CP), false, GA);
   }
 
   SDValue CPIHi =
     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
   SDValue CPILo =
     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
 }
 
 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
     return getTOCEntry(DAG, SDLoc(JT), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
   bool IsPIC = isPositionIndependent();
   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
 
   if (IsPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
                                         PPCII::MO_PIC_FLAG);
     return getTOCEntry(DAG, SDLoc(GA), false, GA);
   }
 
   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
 }
 
 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
   const BlockAddress *BA = BASDN->getBlockAddress();
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual BlockAddress is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
   bool IsPIC = isPositionIndependent();
   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
 }
 
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
 
   // FIXME: TLS addresses currently use medium model code sequences,
   // which is the most useful form.  Eventually support for small and
   // large models could be added if users need it, at the cost of
   // additional complexity.
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   if (DAG.getTarget().Options.EmulatedTLS)
     return LowerToTLSEmulatedModel(GA, DAG);
 
   SDLoc dl(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   bool is64bit = Subtarget.isPPC64();
   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
   PICLevel::Level picLevel = M->getPICLevel();
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
 
   if (Model == TLSModel::LocalExec) {
     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                PPCII::MO_TPREL_HA);
     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                PPCII::MO_TPREL_LO);
     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
                                      is64bit ? MVT::i64 : MVT::i32);
     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
   }
 
   if (Model == TLSModel::InitialExec) {
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                 PPCII::MO_TLS);
     SDValue GOTPtr;
     if (is64bit) {
       setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
                            PtrVT, GOTReg, TGA);
     } else
       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
                                    PtrVT, TGA, GOTPtr);
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
   }
 
   if (Model == TLSModel::GeneralDynamic) {
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
     SDValue GOTPtr;
     if (is64bit) {
       setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
                                    GOTReg, TGA);
     } else {
       if (picLevel == PICLevel::SmallPIC)
         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
       else
         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
     }
     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
                        GOTPtr, TGA, TGA);
   }
 
   if (Model == TLSModel::LocalDynamic) {
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
     SDValue GOTPtr;
     if (is64bit) {
       setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
                            GOTReg, TGA);
     } else {
       if (picLevel == PICLevel::SmallPIC)
         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
       else
         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
     }
     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
                                   PtrVT, GOTPtr, TGA, TGA);
     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
                                       PtrVT, TLSAddr, TGA);
     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
   }
 
   llvm_unreachable("Unknown TLS model!");
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
   SDLoc DL(GSDN);
   const GlobalValue *GV = GSDN->getGlobal();
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
     return getTOCEntry(DAG, DL, true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
   bool IsPIC = isPositionIndependent();
   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
 
   if (IsPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
                                             GSDN->getOffset(),
                                             PPCII::MO_PIC_FLAG);
     return getTOCEntry(DAG, DL, false, GA);
   }
 
   SDValue GAHi =
     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
   SDValue GALo =
     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
 
   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
 
   // If the global reference is actually to a non-lazy-pointer, we have to do an
   // extra load to get the address of the global.
   if (MOHiFlag & PPCII::MO_NLP_FLAG)
     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
   return Ptr;
 }
 
 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDLoc dl(Op);
 
   if (Op.getValueType() == MVT::v2i64) {
     // When the operands themselves are v2i64 values, we need to do something
     // special because VSX has no underlying comparison operations for these.
     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
       // Equality can be handled by casting to the legal type for Altivec
       // comparisons, everything else needs to be expanded.
       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
                  DAG.getSetCC(dl, MVT::v4i32,
                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
                    CC));
       }
 
       return SDValue();
     }
 
     // We handle most of these in the usual way.
     return Op;
   }
 
   // If we're comparing for equality to zero, expose the fact that this is
   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
   // fold the new nodes.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
     if (C->isNullValue() && CC == ISD::SETEQ) {
       EVT VT = Op.getOperand(0).getValueType();
       SDValue Zext = Op.getOperand(0);
       if (VT.bitsLT(MVT::i32)) {
         VT = MVT::i32;
         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
       }
       unsigned Log2b = Log2_32(VT.getSizeInBits());
       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
                                 DAG.getConstant(Log2b, dl, MVT::i32));
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
     }
     // Leave comparisons against 0 and -1 alone for now, since they're usually
     // optimized.  FIXME: revisit this when we can custom lower all setcc
     // optimizations.
     if (C->isAllOnesValue() || C->isNullValue())
       return SDValue();
   }
 
   // If we have an integer seteq/setne, turn it into a compare against zero
   // by xor'ing the rhs with the lhs, which is faster than setting a
   // condition register, reading it back out, and masking the correct bit.  The
   // normal approach here uses sub to do this instead of xor.  Using xor exposes
   // the result to other bit-twiddling opportunities.
   EVT LHSVT = Op.getOperand(0).getValueType();
   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
     EVT VT = Op.getValueType();
     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
                                 Op.getOperand(1));
     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
   }
   return SDValue();
 }
 
 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   EVT VT = Node->getValueType(0);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   SDLoc dl(Node);
 
   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
 
   // gpr_index
   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
   InChain = GprIndex.getValue(1);
 
   if (VT == MVT::i64) {
     // Check if GprIndex is even
     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
                                  DAG.getConstant(1, dl, MVT::i32));
     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
                                           DAG.getConstant(1, dl, MVT::i32));
     // Align GprIndex to be even if it isn't
     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
                            GprIndex);
   }
 
   // fpr index is 1 byte after gpr
   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
                                DAG.getConstant(1, dl, MVT::i32));
 
   // fpr
   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
   InChain = FprIndex.getValue(1);
 
   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
                                        DAG.getConstant(8, dl, MVT::i32));
 
   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
                                         DAG.getConstant(4, dl, MVT::i32));
 
   // areas
   SDValue OverflowArea =
       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
   InChain = OverflowArea.getValue(1);
 
   SDValue RegSaveArea =
       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
   InChain = RegSaveArea.getValue(1);
 
   // select overflow_area if index > 8
   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
 
   // adjustment constant gpr_index * 4/8
   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
                                     VT.isInteger() ? GprIndex : FprIndex,
                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
                                                     MVT::i32));
 
   // OurReg = RegSaveArea + RegConstant
   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
                                RegConstant);
 
   // Floating types are 32 bytes into RegSaveArea
   if (VT.isFloatingPoint())
     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
                          DAG.getConstant(32, dl, MVT::i32));
 
   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                    VT.isInteger() ? GprIndex : FprIndex,
                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
                                                    MVT::i32));
 
   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
                               VT.isInteger() ? VAListPtr : FprPtr,
                               MachinePointerInfo(SV), MVT::i8);
 
   // determine if we should load from reg_save_area or overflow_area
   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
 
   // increase overflow_area by 4/8 if gpr/fpr > 8
   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
                                           dl, MVT::i32));
 
   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
                              OverflowAreaPlusN);
 
   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
                               MachinePointerInfo(), MVT::i32);
 
   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
 
   // We have to copy the entire va_list struct:
   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
   return DAG.getMemcpy(Op.getOperand(0), Op,
                        Op.getOperand(1), Op.getOperand(2),
                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
                        false, MachinePointerInfo(), MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
                                                   SelectionDAG &DAG) const {
   return Op.getOperand(0);
 }
 
 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
   SDLoc dl(Op);
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   bool isPPC64 = (PtrVT == MVT::i64);
   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
   Entry.Ty = IntPtrTy;
   Entry.Node = Trmp; Args.push_back(Entry);
 
   // TrampSize == (isPPC64 ? 48 : 40);
   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
                                isPPC64 ? MVT::i64 : MVT::i32);
   Args.push_back(Entry);
 
   Entry.Node = FPtr; Args.push_back(Entry);
   Entry.Node = Nest; Args.push_back(Entry);
 
   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(Chain)
     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                std::move(Args));
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   SDLoc dl(Op);
 
   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
                         MachinePointerInfo(SV));
   }
 
   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
   // We suppose the given va_list is already allocated.
   //
   // typedef struct {
   //  char gpr;     /* index into the array of 8 GPRs
   //                 * stored in the register save area
   //                 * gpr=0 corresponds to r3,
   //                 * gpr=1 to r4, etc.
   //                 */
   //  char fpr;     /* index into the array of 8 FPRs
   //                 * stored in the register save area
   //                 * fpr=0 corresponds to f1,
   //                 * fpr=1 to f2, etc.
   //                 */
   //  char *overflow_arg_area;
   //                /* location on stack that holds
   //                 * the next overflow argument
   //                 */
   //  char *reg_save_area;
   //               /* where r3:r10 and f1:f8 (if saved)
   //                * are stored
   //                */
   // } va_list[1];
 
   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
                                             PtrVT);
   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                  PtrVT);
 
   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
 
   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
 
   uint64_t FPROffset = 1;
   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
   // Store first byte : number of int regs
   SDValue firstStore =
       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
                         MachinePointerInfo(SV), MVT::i8);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
                         MachinePointerInfo(SV, nextOffset), MVT::i8);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
   // Store second word : arguments given on stack
   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
                                     MachinePointerInfo(SV, nextOffset));
   nextOffset += FrameOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
 
   // Store third word : arguments given in registers
   return DAG.getStore(thirdStore, dl, FR, nextPtr,
                       MachinePointerInfo(SV, nextOffset));
 }
 
 #include "PPCGenCallingConv.inc"
 
 // Function whose sole purpose is to kill compiler warnings
 // stemming from unused functions included from PPCGenCallingConv.inc.
 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
 }
 
 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                       CCValAssign::LocInfo &LocInfo,
                                       ISD::ArgFlagsTy &ArgFlags,
                                       CCState &State) {
   return true;
 }
 
 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
                                              MVT &LocVT,
                                              CCValAssign::LocInfo &LocInfo,
                                              ISD::ArgFlagsTy &ArgFlags,
                                              CCState &State) {
   static const MCPhysReg ArgRegs[] = {
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // Skip one register if the first unallocated register has an even register
   // number and there are still argument registers available which have not been
   // allocated yet. RegNum is actually an index into ArgRegs, which means we
   // need to skip a register if RegNum is odd.
   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
     State.AllocateReg(ArgRegs[RegNum]);
   }
 
   // Always return false here, as this function only makes sure that the first
   // unallocated register has an odd register number and does not actually
   // allocate a register for the current argument.
   return false;
 }
 
 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                                MVT &LocVT,
                                                CCValAssign::LocInfo &LocInfo,
                                                ISD::ArgFlagsTy &ArgFlags,
                                                CCState &State) {
   static const MCPhysReg ArgRegs[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8
   };
 
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // If there is only one Floating-point register left we need to put both f64
   // values of a split ppc_fp128 value on the stack.
   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
     State.AllocateReg(ArgRegs[RegNum]);
   }
 
   // Always return false here, as this function only makes sure that the two f64
   // values a ppc_fp128 value is split into are both passed in registers or both
   // passed on the stack and does not actually allocate a register for the
   // current argument.
   return false;
 }
 
 /// FPR - The set of FP registers that should be allocated for arguments,
 /// on Darwin.
 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
                                 PPC::F11, PPC::F12, PPC::F13};
 
 /// QFPR - The set of QPX registers that should be allocated for arguments.
 static const MCPhysReg QFPR[] = {
     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
 
 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
 /// the stack.
 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
                                        unsigned PtrByteSize) {
   unsigned ArgSize = ArgVT.getStoreSize();
   if (Flags.isByVal())
     ArgSize = Flags.getByValSize();
 
   // Round up to multiples of the pointer size, except for array members,
   // which are always packed.
   if (!Flags.isInConsecutiveRegs())
     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
 
   return ArgSize;
 }
 
 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
 /// on the stack.
 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
                                             ISD::ArgFlagsTy Flags,
                                             unsigned PtrByteSize) {
   unsigned Align = PtrByteSize;
 
   // Altivec parameters are padded to a 16 byte boundary.
   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
       ArgVT == MVT::v1i128)
     Align = 16;
   // QPX vector types stored in double-precision are padded to a 32 byte
   // boundary.
   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
     Align = 32;
 
   // ByVal parameters are aligned as requested.
   if (Flags.isByVal()) {
     unsigned BVAlign = Flags.getByValAlign();
     if (BVAlign > PtrByteSize) {
       if (BVAlign % PtrByteSize != 0)
           llvm_unreachable(
             "ByVal alignment is not a multiple of the pointer size");
 
       Align = BVAlign;
     }
   }
 
   // Array members are always packed to their original alignment.
   if (Flags.isInConsecutiveRegs()) {
     // If the array member was split into multiple registers, the first
     // needs to be aligned to the size of the full type.  (Except for
     // ppcf128, which is only aligned as its f64 components.)
     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
       Align = OrigVT.getStoreSize();
     else
       Align = ArgVT.getStoreSize();
   }
 
   return Align;
 }
 
 /// CalculateStackSlotUsed - Return whether this argument will use its
 /// stack slot (instead of being passed in registers).  ArgOffset,
 /// AvailableFPRs, and AvailableVRs must hold the current argument
 /// position, and will be updated to account for this argument.
 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
                                    ISD::ArgFlagsTy Flags,
                                    unsigned PtrByteSize,
                                    unsigned LinkageSize,
                                    unsigned ParamAreaSize,
                                    unsigned &ArgOffset,
                                    unsigned &AvailableFPRs,
                                    unsigned &AvailableVRs, bool HasQPX) {
   bool UseMemory = false;
 
   // Respect alignment of argument on the stack.
   unsigned Align =
     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
   // If there's no space left in the argument save area, we must
   // use memory (this check also catches zero-sized arguments).
   if (ArgOffset >= LinkageSize + ParamAreaSize)
     UseMemory = true;
 
   // Allocate argument on the stack.
   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
   if (Flags.isInConsecutiveRegsLast())
     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   // If we overran the argument save area, we must use memory
   // (this check catches arguments passed partially in memory)
   if (ArgOffset > LinkageSize + ParamAreaSize)
     UseMemory = true;
 
   // However, if the argument is actually passed in an FPR or a VR,
   // we don't use memory after all.
   if (!Flags.isByVal()) {
     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
         // QPX registers overlap with the scalar FP registers.
         (HasQPX && (ArgVT == MVT::v4f32 ||
                     ArgVT == MVT::v4f64 ||
                     ArgVT == MVT::v4i1)))
       if (AvailableFPRs > 0) {
         --AvailableFPRs;
         return false;
       }
     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
         ArgVT == MVT::v1i128)
       if (AvailableVRs > 0) {
         --AvailableVRs;
         return false;
       }
   }
 
   return UseMemory;
 }
 
 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
 /// ensure minimum alignment required for target.
 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
                                      unsigned NumBytes) {
   unsigned TargetAlign = Lowering->getStackAlignment();
   unsigned AlignMask = TargetAlign - 1;
   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   return NumBytes;
 }
 
 SDValue PPCTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   if (Subtarget.isSVR4ABI()) {
     if (Subtarget.isPPC64())
       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
                                          dl, DAG, InVals);
     else
       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
                                          dl, DAG, InVals);
   } else {
     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
                                        dl, DAG, InVals);
   }
 }
 
 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 
   // 32-bit SVR4 ABI Stack Frame Layout:
   //              +-----------------------------------+
   //        +-->  |            Back chain             |
   //        |     +-----------------------------------+
   //        |     | Floating-point register save area |
   //        |     +-----------------------------------+
   //        |     |    General register save area     |
   //        |     +-----------------------------------+
   //        |     |          CR save word             |
   //        |     +-----------------------------------+
   //        |     |         VRSAVE save word          |
   //        |     +-----------------------------------+
   //        |     |         Alignment padding         |
   //        |     +-----------------------------------+
   //        |     |     Vector register save area     |
   //        |     +-----------------------------------+
   //        |     |       Local variable space        |
   //        |     +-----------------------------------+
   //        |     |        Parameter list area        |
   //        |     +-----------------------------------+
   //        |     |           LR save word            |
   //        |     +-----------------------------------+
   // SP-->  +---  |            Back chain             |
   //              +-----------------------------------+
   //
   // Specifications:
   //   System V Application Binary Interface PowerPC Processor Supplement
   //   AltiVec Technology Programming Interface Manual
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 4;
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                  *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
   if (useSoftFloat())
     CCInfo.PreAnalyzeFormalArguments(Ins);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
   CCInfo.clearWasPPCF128();
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
 
     // Arguments stored in registers.
     if (VA.isRegLoc()) {
       const TargetRegisterClass *RC;
       EVT ValVT = VA.getValVT();
 
       switch (ValVT.getSimpleVT().SimpleTy) {
         default:
           llvm_unreachable("ValVT not supported by formal arguments Lowering");
         case MVT::i1:
         case MVT::i32:
           RC = &PPC::GPRCRegClass;
           break;
         case MVT::f32:
           if (Subtarget.hasP8Vector())
             RC = &PPC::VSSRCRegClass;
           else
             RC = &PPC::F4RCRegClass;
           break;
         case MVT::f64:
           if (Subtarget.hasVSX())
             RC = &PPC::VSFRCRegClass;
           else
             RC = &PPC::F8RCRegClass;
           break;
         case MVT::v16i8:
         case MVT::v8i16:
         case MVT::v4i32:
           RC = &PPC::VRRCRegClass;
           break;
         case MVT::v4f32:
           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
           break;
         case MVT::v2f64:
         case MVT::v2i64:
           RC = &PPC::VSHRCRegClass;
           break;
         case MVT::v4f64:
           RC = &PPC::QFRCRegClass;
           break;
         case MVT::v4i1:
           RC = &PPC::QBRCRegClass;
           break;
       }
 
       // Transform the arguments stored in physical registers into virtual ones.
       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
 
       if (ValVT == MVT::i1)
         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
 
       InVals.push_back(ArgValue);
     } else {
       // Argument stored in memory.
       assert(VA.isMemLoc());
 
       unsigned ArgSize = VA.getLocVT().getStoreSize();
       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
                                       isImmutable);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(
           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
     }
   }
 
   // Assign locations to all of the incoming aggregate by value arguments.
   // Aggregates passed by value are stored in the local variable space of the
   // caller's stack frame, right above the parameter list area.
   SmallVector<CCValAssign, 16> ByValArgLocs;
   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
                       ByValArgLocs, *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
   MinReservedArea = std::max(MinReservedArea, LinkageSize);
 
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized function's reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
   MinReservedArea =
       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   SmallVector<SDValue, 8> MemOps;
 
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     static const MCPhysReg GPArgRegs[] = {
       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
     };
     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
 
     static const MCPhysReg FPArgRegs[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
 
     if (useSoftFloat())
        NumFPArgRegs = 0;
 
     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
 
     FuncInfo->setVarArgsStackOffset(
       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
                              CCInfo.getNextStackOffset(), true));
 
     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // The fixed integer arguments of a variadic function are stored to the
     // VarArgsFrameIndex on the stack so that they may be loaded by
     // dereferencing the result of va_next.
     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
       // Get an existing live-in vreg, or add a new one.
       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
       if (!VReg)
         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
 
     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
     // is set.
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
       // Get an existing live-in vreg, or add a new one.
       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
       if (!VReg)
         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
       SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
                                          PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
 
   if (!MemOps.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
 
   return Chain;
 }
 
 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
 // value to MVT::i64 and then truncate to the correct register size.
 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
                                              EVT ObjectVT, SelectionDAG &DAG,
                                              SDValue ArgVal,
                                              const SDLoc &dl) const {
   if (Flags.isSExt())
     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
                          DAG.getValueType(ObjectVT));
   else if (Flags.isZExt())
     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
                          DAG.getValueType(ObjectVT));
 
   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
 }
 
 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
          "fastcc not supported on varargs functions");
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 8;
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
   static const MCPhysReg VSRH[] = {
     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
   };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR);
-  const unsigned Num_FPR_Regs = 13;
+  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
   const unsigned Num_VR_Regs  = array_lengthof(VR);
   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
 
   // Do a first pass over the arguments to determine whether the ABI
   // guarantees that our caller has allocated the parameter save area
   // on its stack frame.  In the ELFv1 ABI, this is always the case;
   // in the ELFv2 ABI, it is true if this is a vararg function or if
   // any parameter is located in a stack slot.
 
   bool HasParameterArea = !isELFv2ABI || isVarArg;
   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
   unsigned NumBytes = LinkageSize;
   unsigned AvailableFPRs = Num_FPR_Regs;
   unsigned AvailableVRs = Num_VR_Regs;
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
     if (Ins[i].Flags.isNest())
       continue;
 
     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
                                PtrByteSize, LinkageSize, ParamAreaSize,
                                NumBytes, AvailableFPRs, AvailableVRs,
                                Subtarget.hasQPX()))
       HasParameterArea = true;
   }
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
   // although the first ones are often in registers.
 
   unsigned ArgOffset = LinkageSize;
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
   unsigned &QFPR_idx = FPR_idx;
   SmallVector<SDValue, 8> MemOps;
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
     EVT OrigVT = Ins[ArgNo].ArgVT;
     unsigned ObjSize = ObjectVT.getStoreSize();
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
     if (Ins[ArgNo].isOrigArg()) {
       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
     }
     // We re-align the argument offset for each argument, except when using the
     // fast calling convention, when we need to make sure we do that only when
     // we'll actually use a stack slot.
     unsigned CurArgOffset, Align;
     auto ComputeArgOffset = [&]() {
       /* Respect alignment of argument on the stack.  */
       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
       CurArgOffset = ArgOffset;
     };
 
     if (CallConv != CallingConv::Fast) {
       ComputeArgOffset();
 
       /* Compute GPR index associated with argument offset.  */
       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
     }
 
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
     if (Flags.isByVal()) {
       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
 
       if (CallConv == CallingConv::Fast)
         ComputeArgOffset();
 
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       // Empty aggregate parameters do not take up registers.  Examples:
       //   struct { } a;
       //   union  { } b;
       //   int c[0];
       // etc.  However, we have to provide a place-holder in InVals, so
       // pretend we have an 8-byte item at the current address for that
       // purpose.
       if (!ObjSize) {
         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
         InVals.push_back(FIN);
         continue;
       }
 
       // Create a stack object covering all stack doublewords occupied
       // by the argument.  If the argument is (fully or partially) on
       // the stack, or if the argument is fully in registers but the
       // caller has allocated the parameter save anyway, we can refer
       // directly to the caller's stack frame.  Otherwise, create a
       // local copy in our own frame.
       int FI;
       if (HasParameterArea ||
           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
       else
         FI = MFI->CreateStackObject(ArgSize, Align, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
 
       // Handle aggregates smaller than 8 bytes.
       if (ObjSize < PtrByteSize) {
         // The value of the object is its address, which differs from the
         // address of the enclosing doubleword on big-endian systems.
         SDValue Arg = FIN;
         if (!isLittleEndian) {
           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
         }
         InVals.push_back(Arg);
 
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store;
 
           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
                                       MachinePointerInfo(&*FuncArg), ObjType);
           } else {
             // For sizes that don't fit a truncating store (3, 5, 6, 7),
             // store the whole register as-is to the parameter save area
             // slot.
             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
                                  MachinePointerInfo(&*FuncArg));
           }
 
           MemOps.push_back(Store);
         }
         // Whether we copied from a register or not, advance the offset
         // into the parameter save area by a full doubleword.
         ArgOffset += PtrByteSize;
         continue;
       }
 
       // The value of the object is its address, which is the address of
       // its first stack doubleword.
       InVals.push_back(FIN);
 
       // Store whatever pieces of the object are in registers to memory.
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
         if (GPR_idx == Num_GPR_Regs)
           break;
 
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
         SDValue Addr = FIN;
         if (j) {
           SDValue Off = DAG.getConstant(j, dl, PtrVT);
           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
         }
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
                                      MachinePointerInfo(&*FuncArg, j));
         MemOps.push_back(Store);
         ++GPR_idx;
       }
       ArgOffset += ArgSize;
       continue;
     }
 
     switch (ObjectVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unhandled argument type!");
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
       if (Flags.isNest()) {
         // The 'nest' parameter, if any, is passed in R11.
         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
 
         break;
       }
 
       // These can be scalar arguments or elements of an integer array type
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != Num_GPR_Regs) {
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
           // value to MVT::i64 and then truncate to the correct register size.
           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
       } else {
         if (CallConv == CallingConv::Fast)
           ComputeArgOffset();
 
         needsLoad = true;
         ArgSize = PtrByteSize;
       }
       if (CallConv != CallingConv::Fast || needsLoad)
         ArgOffset += 8;
       break;
 
     case MVT::f32:
     case MVT::f64:
       // These can be scalar arguments or elements of a float array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // float aggregates.
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
 
         if (ObjectVT == MVT::f32)
           VReg = MF.addLiveIn(FPR[FPR_idx],
                               Subtarget.hasP8Vector()
                                   ? &PPC::VSSRCRegClass
                                   : &PPC::F4RCRegClass);
         else
           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
                                                 ? &PPC::VSFRCRegClass
                                                 : &PPC::F8RCRegClass);
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
         // once we support fp <-> gpr moves.
 
         // This can only ever happen in the presence of f32 array types,
         // since otherwise we never run out of FPRs before running out
         // of GPRs.
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::f32) {
           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
                                  DAG.getConstant(32, dl, MVT::i32));
           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
         }
 
         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
       } else {
         if (CallConv == CallingConv::Fast)
           ComputeArgOffset();
 
         needsLoad = true;
       }
 
       // When passing an array of floats, the array occupies consecutive
       // space in the argument area; only round up to the next doubleword
       // at the end of the array.  Otherwise, each float takes 8 bytes.
       if (CallConv != CallingConv::Fast || needsLoad) {
         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
         ArgOffset += ArgSize;
         if (Flags.isInConsecutiveRegsLast())
           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       }
       break;
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
     case MVT::v1i128:
       if (!Subtarget.hasQPX()) {
       // These can be scalar arguments or elements of a vector array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
       if (VR_idx != Num_VR_Regs) {
         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++VR_idx;
       } else {
         if (CallConv == CallingConv::Fast)
           ComputeArgOffset();
 
         needsLoad = true;
       }
       if (CallConv != CallingConv::Fast || needsLoad)
         ArgOffset += 16;
       break;
       } // not QPX
 
       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
              "Invalid QPX parameter type");
       /* fall through */
 
     case MVT::v4f64:
     case MVT::v4i1:
       // QPX vectors are treated like their scalar floating-point subregisters
       // (except that they're larger).
       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
       if (QFPR_idx != Num_QFPR_Regs) {
         const TargetRegisterClass *RC;
         switch (ObjectVT.getSimpleVT().SimpleTy) {
         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
         default:         RC = &PPC::QBRCRegClass; break;
         }
 
         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++QFPR_idx;
       } else {
         if (CallConv == CallingConv::Fast)
           ComputeArgOffset();
         needsLoad = true;
       }
       if (CallConv != CallingConv::Fast || needsLoad)
         ArgOffset += Sz;
       break;
     }
 
     // We need to load the argument to a virtual register if we determined
     // above that we ran out of physical registers of the appropriate type.
     if (needsLoad) {
       if (ObjSize < ArgSize && !isLittleEndian)
         CurArgOffset += ArgSize - ObjSize;
       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
     }
 
     InVals.push_back(ArgVal);
   }
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea;
   if (HasParameterArea)
     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
   else
     MinReservedArea = LinkageSize;
 
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized functions' reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
   MinReservedArea =
       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     int Depth = ArgOffset;
 
     FuncInfo->setVarArgsFrameIndex(
       MFI->CreateFixedObject(PtrByteSize, Depth, true));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by dereferencing
     // the result of va_next.
     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
 
   if (!MemOps.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
 
   return Chain;
 }
 
 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned ArgOffset = LinkageSize;
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
-  const unsigned Num_FPR_Regs = 13;
+  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
   const unsigned Num_VR_Regs  = array_lengthof( VR);
 
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   // In 32-bit non-varargs functions, the stack space for vectors is after the
   // stack space for non-vectors.  We do not use this space unless we have
   // too many vectors to fit in registers, something that only occurs in
   // constructed examples:), but we have to walk the arglist to figure
   // that out...for the pathological case, compute VecArgOffset as the
   // start of the vector parameter area.  Computing VecArgOffset is the
   // entire point of the following loop.
   unsigned VecArgOffset = ArgOffset;
   if (!isVarArg && !isPPC64) {
     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
          ++ArgNo) {
       EVT ObjectVT = Ins[ArgNo].VT;
       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
 
       if (Flags.isByVal()) {
         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
         unsigned ObjSize = Flags.getByValSize();
         unsigned ArgSize =
                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
         VecArgOffset += ArgSize;
         continue;
       }
 
       switch(ObjectVT.getSimpleVT().SimpleTy) {
       default: llvm_unreachable("Unhandled argument type!");
       case MVT::i1:
       case MVT::i32:
       case MVT::f32:
         VecArgOffset += 4;
         break;
       case MVT::i64:  // PPC64
       case MVT::f64:
         // FIXME: We are guaranteed to be !isPPC64 at this point.
         // Does MVT::i64 apply?
         VecArgOffset += 8;
         break;
       case MVT::v4f32:
       case MVT::v4i32:
       case MVT::v8i16:
       case MVT::v16i8:
         // Nothing to do, we're only looking at Nonvector args here.
         break;
       }
     }
   }
   // We've found where the vector parameter area in memory is.  Skip the
   // first 12 parameters; these don't use that memory.
   VecArgOffset = ((VecArgOffset+15)/16)*16;
   VecArgOffset += 12*16;
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
   // although the first ones are often in registers.
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
     if (Ins[ArgNo].isOrigArg()) {
       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
     }
     unsigned CurArgOffset = ArgOffset;
 
     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
       if (isVarArg || isPPC64) {
         MinReservedArea = ((MinReservedArea+15)/16)*16;
         MinReservedArea += CalculateStackSlotSize(ObjectVT,
                                                   Flags,
                                                   PtrByteSize);
       } else  nAltivecParamsAtEnd++;
     } else
       // Calculate min reserved area.
       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
                                                 Flags,
                                                 PtrByteSize);
 
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
     if (Flags.isByVal()) {
       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
 
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       // Objects of size 1 and 2 are right justified, everything else is
       // left justified.  This means the memory address is adjusted forwards.
       if (ObjSize==1 || ObjSize==2) {
         CurArgOffset = CurArgOffset + (4 - ObjSize);
       }
       // The value of the object is its address.
       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg;
           if (isPPC64)
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
           else
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
           SDValue Store =
               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                 MachinePointerInfo(&*FuncArg), ObjType);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
 
         ArgOffset += PtrByteSize;
 
         continue;
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
         // Store whatever pieces of the object are in registers
         // to memory.  ArgOffset will be the address of the beginning
         // of the object.
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg;
           if (isPPC64)
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
           else
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
                                        MachinePointerInfo(&*FuncArg, j));
           MemOps.push_back(Store);
           ++GPR_idx;
           ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
           break;
         }
       }
       continue;
     }
 
     switch (ObjectVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unhandled argument type!");
     case MVT::i1:
     case MVT::i32:
       if (!isPPC64) {
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
 
           if (ObjectVT == MVT::i1)
             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
 
           ++GPR_idx;
         } else {
           needsLoad = true;
           ArgSize = PtrByteSize;
         }
         // All int arguments reserve stack space in the Darwin ABI.
         ArgOffset += PtrByteSize;
         break;
       }
       // FALLTHROUGH
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
           // value to MVT::i64 and then truncate to the correct register size.
           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
 
         ++GPR_idx;
       } else {
         needsLoad = true;
         ArgSize = PtrByteSize;
       }
       // All int arguments reserve stack space in the Darwin ABI.
       ArgOffset += 8;
       break;
 
     case MVT::f32:
     case MVT::f64:
       // Every 4 bytes of argument space consumes one of the GPRs available for
       // argument passing.
       if (GPR_idx != Num_GPR_Regs) {
         ++GPR_idx;
         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
           ++GPR_idx;
       }
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
 
         if (ObjectVT == MVT::f32)
           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
         else
           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
       } else {
         needsLoad = true;
       }
 
       // All FP arguments reserve stack space in the Darwin ABI.
       ArgOffset += isPPC64 ? 8 : ObjSize;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
     case MVT::v16i8:
       // Note that vector arguments in registers don't reserve stack space,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
             ArgOffset += PtrByteSize;
             if (GPR_idx != Num_GPR_Regs)
               GPR_idx++;
           }
           ArgOffset += 16;
           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
         }
         ++VR_idx;
       } else {
         if (!isVarArg && !isPPC64) {
           // Vectors go after all the nonvectors.
           CurArgOffset = VecArgOffset;
           VecArgOffset += 16;
         } else {
           // Vectors are aligned.
           ArgOffset = ((ArgOffset+15)/16)*16;
           CurArgOffset = ArgOffset;
           ArgOffset += 16;
         }
         needsLoad = true;
       }
       break;
     }
 
     // We need to load the argument to a virtual register if we determined above
     // that we ran out of physical registers of the appropriate type.
     if (needsLoad) {
       int FI = MFI->CreateFixedObject(ObjSize,
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
     }
 
     InVals.push_back(ArgVal);
   }
 
   // Allow for Altivec parameters at the end, if needed.
   if (nAltivecParamsAtEnd) {
     MinReservedArea = ((MinReservedArea+15)/16)*16;
     MinReservedArea += 16*nAltivecParamsAtEnd;
   }
 
   // Area that is at least reserved in the caller of this function.
   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
 
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized functions' reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
   MinReservedArea =
       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     int Depth = ArgOffset;
 
     FuncInfo->setVarArgsFrameIndex(
       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
                              Depth, true));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by dereferencing
     // the result of va_next.
     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg;
 
       if (isPPC64)
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       else
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
 
   if (!MemOps.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
 
   return Chain;
 }
 
 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
 /// adjusted to accommodate the arguments for the tailcall.
 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
                                    unsigned ParamSize) {
 
   if (!isTailCall) return 0;
 
   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
   unsigned CallerMinReservedArea = FI->getMinReservedArea();
   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
   // Remember only if the new adjustement is bigger.
   if (SPDiff < FI->getTailCallSPDelta())
     FI->setTailCallSPDelta(SPDiff);
 
   return SPDiff;
 }
 
 static bool isFunctionGlobalAddress(SDValue Callee);
 
 static bool
 resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
   // If !G, Callee can be an external symbol.
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
   if (!G) return false;
 
   const GlobalValue *GV = G->getGlobal();
 
   if (GV->isDeclaration()) return false;
 
   switch(GV->getLinkage()) {
   default: llvm_unreachable("unknow linkage type");
   case GlobalValue::AvailableExternallyLinkage:
   case GlobalValue::ExternalWeakLinkage:
     return false;
 
   // Callee with weak linkage is allowed if it has hidden or protected
   // visibility
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
     if (GV->hasDefaultVisibility())
       return false;
 
   case GlobalValue::ExternalLinkage:
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
     break;
   }
 
   // With '-fPIC', calling default visiblity function need insert 'nop' after
   // function call, no matter that function resides in same module or not, so
   // we treat it as in different module.
   if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
     return false;
 
   return true;
 }
 
 static bool
 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
 
   const unsigned PtrByteSize = 8;
   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
   const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = 13;
   const unsigned NumVRs = array_lengthof(VR);
   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
 
   unsigned NumBytes = LinkageSize;
   unsigned AvailableFPRs = NumFPRs;
   unsigned AvailableVRs = NumVRs;
 
   for (const ISD::OutputArg& Param : Outs) {
     if (Param.Flags.isNest()) continue;
 
     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
                                PtrByteSize, LinkageSize, ParamAreaSize,
                                NumBytes, AvailableFPRs, AvailableVRs,
                                Subtarget.hasQPX()))
       return true;
   }
   return false;
 }
 
 static bool
 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
   if (CS->arg_size() != CallerFn->getArgumentList().size())
     return false;
 
   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
 
   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
     const Value* CalleeArg = *CalleeArgIter;
     const Value* CallerArg = &(*CallerArgIter);
     if (CalleeArg == CallerArg)
       continue;
 
     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
     //      }
     // 1st argument of callee is undef and has the same type as caller.
     if (CalleeArg->getType() == CallerArg->getType() &&
         isa<UndefValue>(CalleeArg))
       continue;
 
     return false;
   }
 
   return true;
 }
 
 bool
 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
                                     SDValue Callee,
                                     CallingConv::ID CalleeCC,
                                     ImmutableCallSite *CS,
                                     bool isVarArg,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     SelectionDAG& DAG) const {
   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
 
   if (DisableSCO && !TailCallOpt) return false;
 
   // Variadic argument functions are not supported.
   if (isVarArg) return false;
 
   MachineFunction &MF = DAG.getMachineFunction();
   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
 
   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
   // the same calling convention
   if (CallerCC != CalleeCC) return false;
 
   // SCO support C calling convention
   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
     return false;
 
   // Caller contains any byval parameter is not supported.
   if (std::any_of(Ins.begin(), Ins.end(),
                   [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
     return false;
 
   // Callee contains any byval parameter is not supported, too.
   // Note: This is a quick work around, because in some cases, e.g.
   // caller's stack size > callee's stack size, we are still able to apply
   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
     return false;
 
   // No TCO/SCO on indirect call because Caller have to restore its TOC
   if (!isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee))
     return false;
 
   // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
   // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
   // module.
   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
   if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
     return false;
 
   // TCO allows altering callee ABI, so we don't have to check further.
   if (CalleeCC == CallingConv::Fast && TailCallOpt)
     return true;
 
   if (DisableSCO) return false;
 
   // If callee use the same argument list that caller is using, then we can
   // apply SCO on this case. If it is not, then we need to check if callee needs
   // stack for passing arguments.
   if (!hasSameArgumentList(MF.getFunction(), CS) &&
       needStackSlotPassParameters(Subtarget, Outs)) {
     return false;
   }
 
   return true;
 }
 
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
 bool
 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      CallingConv::ID CalleeCC,
                                                      bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
     return false;
 
   // Variable argument functions are not supported.
   if (isVarArg)
     return false;
 
   MachineFunction &MF = DAG.getMachineFunction();
   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
     // Functions containing by val parameters are not supported.
     for (unsigned i = 0; i != Ins.size(); i++) {
        ISD::ArgFlagsTy Flags = Ins[i].Flags;
        if (Flags.isByVal()) return false;
     }
 
     // Non-PIC/GOT tail calls are supported.
     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
       return true;
 
     // At the moment we can only do local tail calls (in same module, hidden
     // or protected) if we are generating PIC.
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
       return G->getGlobal()->hasHiddenVisibility()
           || G->getGlobal()->hasProtectedVisibility();
   }
 
   return false;
 }
 
 /// isCallCompatibleAddress - Return the immediate to use if the specified
 /// 32-bit value is representable in the immediate field of a BxA instruction.
 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
   if (!C) return nullptr;
 
   int Addr = C->getZExtValue();
   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
       SignExtend32<26>(Addr) != Addr)
     return nullptr;  // Top 6 bits have to be sext of immediate.
 
   return DAG
       .getConstant(
           (int)C->getZExtValue() >> 2, SDLoc(Op),
           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
       .getNode();
 }
 
 namespace {
 
 struct TailCallArgumentInfo {
   SDValue Arg;
   SDValue FrameIdxOp;
   int       FrameIdx;
 
   TailCallArgumentInfo() : FrameIdx(0) {}
 };
 }
 
 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
 static void StoreTailCallArgumentsToStackSlot(
     SelectionDAG &DAG, SDValue Chain,
     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
     SDValue Arg = TailCallArgs[i].Arg;
     SDValue FIN = TailCallArgs[i].FrameIdxOp;
     int FI = TailCallArgs[i].FrameIdx;
     // Store relative to framepointer.
     MemOpChains.push_back(DAG.getStore(
         Chain, dl, Arg, FIN,
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
   }
 }
 
 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
 /// the appropriate stack slot for the tail call optimized function call.
 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
                                              SDValue OldRetAddr, SDValue OldFP,
                                              int SPDiff, const SDLoc &dl) {
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     MachineFunction &MF = DAG.getMachineFunction();
     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
     bool isPPC64 = Subtarget.isPPC64();
     int SlotSize = isPPC64 ? 8 : 4;
     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc, true);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
     if (Subtarget.isDarwinABI()) {
       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                           true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
                            MachinePointerInfo::getFixedStack(
                                DAG.getMachineFunction(), NewFPIdx));
     }
   }
   return Chain;
 }
 
 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
 /// the position of the argument.
 static void
 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
                          SDValue Arg, int SPDiff, unsigned ArgOffset,
                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
   int Offset = ArgOffset + SPDiff;
   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   SDValue FIN = DAG.getFrameIndex(FI, VT);
   TailCallArgumentInfo Info;
   Info.Arg = Arg;
   Info.FrameIdxOp = FIN;
   Info.FrameIdx = FI;
   TailCallArguments.push_back(Info);
 }
 
 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
 /// stack slot. Returns the chain as result and the loaded frame pointers in
 /// LROpOut/FPOpout. Used when tail calling.
 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
     SDValue &FPOpOut, const SDLoc &dl) const {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
     Chain = SDValue(LROpOut.getNode(), 1);
 
     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
     // slot as the FP is never overwritten.
     if (Subtarget.isDarwinABI()) {
       FPOpOut = getFramePointerFrameIndex(DAG);
       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
   }
   return Chain;
 }
 
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 /// by "Src" to address "Dst" of size "Size".  Alignment information is
 /// specified by the specific parameter attribute. The copy will be passed as
 /// a byval function parameter.
 /// Sometimes what we are copying is the end of a larger object, the part that
 /// does not fit in registers.
 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
                                          SDValue Chain, ISD::ArgFlagsTy Flags,
                                          SelectionDAG &DAG, const SDLoc &dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        false, false, false, MachinePointerInfo(),
                        MachinePointerInfo());
 }
 
 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
 /// tail calls.
 static void LowerMemOpCallTo(
     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   if (!isTailCall) {
     if (isVector) {
       SDValue StackPtr;
       if (isPPC64)
         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
       else
         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                            DAG.getConstant(ArgOffset, dl, PtrVT));
     }
     MemOpChains.push_back(
         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
     // Calculate and remember argument location.
   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
                                   TailCallArguments);
 }
 
 static void
 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
                 SDValue FPOp,
                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
   // might overwrite each other in case of tail call optimization.
   SmallVector<SDValue, 8> MemOpChains2;
   // Do not flag preceding copytoreg stuff together with the following stuff.
   InFlag = SDValue();
   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
                                     MemOpChains2, dl);
   if (!MemOpChains2.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
 
   // Store the return address to the appropriate stack slot.
   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
 
   // Emit callseq_end just before tailcall node.
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 }
 
 // Is this global address that of a function that can be called by name? (as
 // opposed to something that must hold a descriptor for an indirect call).
 static bool isFunctionGlobalAddress(SDValue Callee) {
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
       return false;
 
     return G->getGlobal()->getValueType()->isFunctionTy();
   }
 
   return false;
 }
 
 static unsigned
 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
             bool isPatchPoint, bool hasNest,
             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
 
   bool isPPC64 = Subtarget.isPPC64();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
   bool isELFv2ABI = Subtarget.isELFv2ABI();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
 
   unsigned CallOpc = PPCISD::CALL;
 
   bool needIndirectCall = true;
   if (!isSVR4ABI || !isPPC64)
     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
       // If this is an absolute destination address, use the munged value.
       Callee = SDValue(Dest, 0);
       needIndirectCall = false;
     }
 
   // PC-relative references to external symbols should go through $stub, unless
   // we're building with the leopard linker or later, which automatically
   // synthesizes these stubs.
   const TargetMachine &TM = DAG.getTarget();
   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
   const GlobalValue *GV = nullptr;
   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
     GV = G->getGlobal();
   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
 
   if (isFunctionGlobalAddress(Callee)) {
     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
     // A call to a TLS address is actually an indirect call to a
     // thread-specific pointer.
     unsigned OpFlags = 0;
     if (UsePlt)
       OpFlags = PPCII::MO_PLT;
 
     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
     // every direct call is) turn it into a TargetGlobalAddress /
     // TargetExternalSymbol node so that legalize doesn't hack it.
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                         Callee.getValueType(), 0, OpFlags);
     needIndirectCall = false;
   }
 
   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
 
     if (UsePlt)
       OpFlags = PPCII::MO_PLT;
 
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
                                          OpFlags);
     needIndirectCall = false;
   }
 
   if (isPatchPoint) {
     // We'll form an invalid direct call when lowering a patchpoint; the full
     // sequence for an indirect call is complicated, and many of the
     // instructions introduced might have side effects (and, thus, can't be
     // removed later). The call itself will be removed as soon as the
     // argument/return lowering is complete, so the fact that it has the wrong
     // kind of operands should not really matter.
     needIndirectCall = false;
   }
 
   if (needIndirectCall) {
     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
     // to do the call, we can't use PPCISD::CALL.
     SDValue MTCTROps[] = {Chain, Callee, InFlag};
 
     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
       // Function pointers in the 64-bit SVR4 ABI do not point to the function
       // entry point, but to the function descriptor (the function entry point
       // address is part of the function descriptor though).
       // The function descriptor is a three doubleword structure with the
       // following fields: function entry point, TOC base address and
       // environment pointer.
       // Thus for a call through a function pointer, the following actions need
       // to be performed:
       //   1. Save the TOC of the caller in the TOC save area of its stack
       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
       //   2. Load the address of the function entry point from the function
       //      descriptor.
       //   3. Load the TOC of the callee from the function descriptor into r2.
       //   4. Load the environment pointer from the function descriptor into
       //      r11.
       //   5. Branch to the function entry point address.
       //   6. On return of the callee, the TOC of the caller needs to be
       //      restored (this is done in FinishCall()).
       //
       // The loads are scheduled at the beginning of the call sequence, and the
       // register copies are flagged together to ensure that no other
       // operations can be scheduled in between. E.g. without flagging the
       // copies together, a TOC access in the caller could be scheduled between
       // the assignment of the callee TOC and the branch to the callee, which
       // results in the TOC access going through the TOC of the callee instead
       // of going through the TOC of the caller, which leads to incorrect code.
 
       // Load the address of the function entry point from the function
       // descriptor.
       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
       if (LDChain.getValueType() == MVT::Glue)
         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
 
       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
                           ? MachineMemOperand::MOInvariant
                           : MachineMemOperand::MONone;
 
       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
                                         /* Alignment = */ 8, MMOFlags);
 
       // Load environment pointer into r11.
       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
       SDValue LoadEnvPtr =
           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
                       /* Alignment = */ 8, MMOFlags);
 
       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
       SDValue TOCPtr =
           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
                       /* Alignment = */ 8, MMOFlags);
 
       setUsesTOCBasePtr(DAG);
       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
                                         InFlag);
       Chain = TOCVal.getValue(0);
       InFlag = TOCVal.getValue(1);
 
       // If the function call has an explicit 'nest' parameter, it takes the
       // place of the environment pointer.
       if (!hasNest) {
         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
                                           InFlag);
 
         Chain = EnvVal.getValue(0);
         InFlag = EnvVal.getValue(1);
       }
 
       MTCTROps[0] = Chain;
       MTCTROps[1] = LoadFuncPtr;
       MTCTROps[2] = InFlag;
     }
 
     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
     InFlag = Chain.getValue(1);
 
     NodeTys.clear();
     NodeTys.push_back(MVT::Other);
     NodeTys.push_back(MVT::Glue);
     Ops.push_back(Chain);
     CallOpc = PPCISD::BCTRL;
     Callee.setNode(nullptr);
     // Add use of X11 (holding environment pointer)
     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
     // Add CTR register as callee so a bctr can be emitted later.
     if (isTailCall)
       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
   }
 
   // If this is a direct call, pass the chain and the callee.
   if (Callee.getNode()) {
     Ops.push_back(Chain);
     Ops.push_back(Callee);
   }
   // If this is a tail call add stack pointer delta.
   if (isTailCall)
     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
 
   // Add argument registers to the end of the list so that they are known live
   // into the call.
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
   // into the call.
   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
     setUsesTOCBasePtr(DAG);
     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
   }
 
   return CallOpc;
 }
 
 static
 bool isLocalCall(const SDValue &Callee)
 {
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
     return G->getGlobal()->isStrongDefinitionForLinker();
   return false;
 }
 
 SDValue PPCTargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                     *DAG.getContext());
   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
     SDValue Val = DAG.getCopyFromReg(Chain, dl,
                                      VA.getLocReg(), VA.getLocVT(), InFlag);
     Chain = Val.getValue(1);
     InFlag = Val.getValue(2);
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::AExt:
       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
       break;
     case CCValAssign::ZExt:
       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
                         DAG.getValueType(VA.getValVT()));
       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
       break;
     case CCValAssign::SExt:
       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
                         DAG.getValueType(VA.getValVT()));
       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
       break;
     }
 
     InVals.push_back(Val);
   }
 
   return Chain;
 }
 
 SDValue PPCTargetLowering::FinishCall(
     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
 
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
                                  SPDiff, isTailCall, isPatchPoint, hasNest,
                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
 
   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
 
   // When performing tail call optimization the callee pops its arguments off
   // the stack. Account for this here so these bytes can be pushed back on in
   // PPCFrameLowering::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
     (CallConv == CallingConv::Fast &&
      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
 
   // Add a register mask operand representing the call-preserved registers.
   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask =
       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
   // Emit tail call.
   if (isTailCall) {
     assert(((Callee.getOpcode() == ISD::Register &&
              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
             isa<ConstantSDNode>(Callee)) &&
     "Expecting an global address, external symbol, absolute value or register");
 
     DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
   }
 
   // Add a NOP immediately after the branch instruction when using the 64-bit
   // SVR4 ABI. At link time, if caller and callee are in a different module and
   // thus have a different TOC, the call will be replaced with a call to a stub
   // function which saves the current TOC, loads the TOC of the callee and
   // branches to the callee. The NOP will be replaced with a load instruction
   // which restores the TOC of the caller from the TOC save slot of the current
   // stack frame. If caller and callee belong to the same module (and have the
   // same TOC), the NOP will remain unchanged.
 
   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
       !isPatchPoint) {
     if (CallOpc == PPCISD::BCTRL) {
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
       // pointers in the 64-bit SVR4 ABI.
       // We are using a target-specific load with r2 hard coded, because the
       // result of a target-independent load would never go directly into r2,
       // since r2 is a reserved register (which prevents the register allocator
       // from allocating it), resulting in an additional register being
       // allocated and an unnecessary move instruction being generated.
       CallOpc = PPCISD::BCTRL_LOAD_TOC;
 
       EVT PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
 
       // The address needs to go after the chain input but before the flag (or
       // any other variadic arguments).
       Ops.insert(std::next(Ops.begin()), AddTOC);
     } else if ((CallOpc == PPCISD::CALL) &&
                (!isLocalCall(Callee) ||
                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
       // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP;
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
                              InFlag, dl);
   if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
                          Ins, dl, DAG, InVals);
 }
 
 SDValue
 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                              SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
   SDLoc &dl                             = CLI.DL;
   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   SDValue Chain                         = CLI.Chain;
   SDValue Callee                        = CLI.Callee;
   bool &isTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
   bool isPatchPoint                     = CLI.IsPatchPoint;
   ImmutableCallSite *CS                 = CLI.CS;
 
   if (isTailCall) {
     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
       isTailCall = false;
     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
       isTailCall =
         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
                                                  isVarArg, Outs, Ins, DAG);
     else
       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                      Ins, DAG);
     if (isTailCall) {
       ++NumTailCalls;
       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
         ++NumSiblingCalls;
 
       assert(isa<GlobalAddressSDNode>(Callee) &&
              "Callee should be an llvm::Function object.");
       DEBUG(
         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
         const unsigned Width = 80 - strlen("TCO caller: ")
                                   - strlen(", callee linkage: 0, 0");
         dbgs() << "TCO caller: "
                << left_justify(DAG.getMachineFunction().getName(), Width)
                << ", callee linkage: "
                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
       );
     }
   }
 
   if (!isTailCall && CS && CS->isMustTailCall())
     report_fatal_error("failed to perform tail call elimination on a call "
                        "site marked musttail");
 
   // When long calls (i.e. indirect calls) are always used, calls are always
   // made via function pointer. If we have a function name, first translate it
   // into a pointer.
   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
       !isTailCall)
     Callee = LowerGlobalAddress(Callee, DAG);
 
   if (Subtarget.isSVR4ABI()) {
     if (Subtarget.isPPC64())
       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
                               dl, DAG, InVals, CS);
     else
       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
                               dl, DAG, InVals, CS);
   }
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
                           dl, DAG, InVals, CS);
 }
 
 SDValue PPCTargetLowering::LowerCall_32SVR4(
     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
     bool isTailCall, bool isPatchPoint,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
     ImmutableCallSite *CS) const {
   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
   assert((CallConv == CallingConv::C ||
           CallConv == CallingConv::Fast) && "Unknown calling convention!");
 
   unsigned PtrByteSize = 4;
 
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Mark this function as potentially containing a function that contains a
   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, parameter list area and the part of the local variable space which
   // contains copies of aggregates which are passed by value.
 
   // Assign locations to all of the outgoing arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
                        PtrByteSize);
   if (useSoftFloat())
     CCInfo.PreAnalyzeCallOperands(Outs);
 
   if (isVarArg) {
     // Handle fixed and variable vector arguments differently.
     // Fixed vector arguments go into registers as long as registers are
     // available. Variable vector arguments always go into memory.
     unsigned NumArgs = Outs.size();
 
     for (unsigned i = 0; i != NumArgs; ++i) {
       MVT ArgVT = Outs[i].VT;
       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
       bool Result;
 
       if (Outs[i].IsFixed) {
         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
                                CCInfo);
       } else {
         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
                                       ArgFlags, CCInfo);
       }
 
       if (Result) {
 #ifndef NDEBUG
         errs() << "Call operand #" << i << " has unhandled type "
              << EVT(ArgVT).getEVTString() << "\n";
 #endif
         llvm_unreachable(nullptr);
       }
     }
   } else {
     // All arguments are treated the same.
     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
   }
   CCInfo.clearWasPPCF128();
 
   // Assign locations to all of the outgoing aggregate by value arguments.
   SmallVector<CCValAssign, 16> ByValArgLocs;
   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
 
   // Size of the linkage area, parameter list area and the part of the local
   // space variable where copies of aggregates which are passed by value are
   // stored.
   unsigned NumBytes = CCByValInfo.getNextStackOffset();
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                                dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be moved somewhere else
   // later.
   SDValue LROp, FPOp;
   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   // Set up a copy of the stack pointer for use loading and storing any
   // arguments that may not fit in the registers available for argument
   // passing.
   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
   SmallVector<SDValue, 8> MemOpChains;
 
   bool seenFloatArg = false;
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, j = 0, e = ArgLocs.size();
        i != e;
        ++i) {
     CCValAssign &VA = ArgLocs[i];
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
     if (Flags.isByVal()) {
       // Argument is an aggregate which is passed by value, thus we need to
       // create a copy of it in the local variable space of the current stack
       // frame (which is the stack frame of the caller) and pass the address of
       // this copy to the callee.
       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
       CCValAssign &ByValVA = ByValArgLocs[j++];
       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
 
       // Memory reserved in the local variable space of the callers stack frame.
       unsigned LocMemOffset = ByValVA.getLocMemOffset();
 
       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
                            StackPtr, PtrOff);
 
       // Create a copy of the argument in the local area of the current
       // stack frame.
       SDValue MemcpyCall =
         CreateCopyOfByValArgument(Arg, PtrOff,
                                   CallSeqStart.getNode()->getOperand(0),
                                   Flags, DAG, dl);
 
       // This must go outside the CALLSEQ_START..END.
       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                            CallSeqStart.getNode()->getOperand(1),
                            SDLoc(MemcpyCall));
       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                              NewCallSeqStart.getNode());
       Chain = CallSeqStart = NewCallSeqStart;
 
       // Pass the address of the aggregate copy on the stack either in a
       // physical register or in the parameter list area of the current stack
       // frame to the callee.
       Arg = PtrOff;
     }
 
     if (VA.isRegLoc()) {
       if (Arg.getValueType() == MVT::i1)
         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
 
       seenFloatArg |= VA.getLocVT().isFloatingPoint();
       // Put argument in a physical register.
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else {
       // Put argument in the parameter list area of the current stack frame.
       assert(VA.isMemLoc());
       unsigned LocMemOffset = VA.getLocMemOffset();
 
       if (!isTailCall) {
         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
                              StackPtr, PtrOff);
 
         MemOpChains.push_back(
             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
       } else {
         // Calculate and remember argument location.
         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
                                  TailCallArguments);
       }
     }
   }
 
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
   // Set CR bit 6 to true if this is a vararg call with floating args passed in
   // registers.
   if (isVarArg) {
     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, InFlag };
 
     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
 
     InFlag = Chain.getValue(1);
   }
 
   if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
                     TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
                     /* unused except on PPC64 ELFv1 */ false, DAG,
                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
                     NumBytes, Ins, InVals, CS);
 }
 
 // Copy an argument into memory, being careful to do this outside the
 // call sequence for the call to which the argument belongs.
 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
     SelectionDAG &DAG, const SDLoc &dl) const {
   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                         CallSeqStart.getNode()->getOperand(0),
                         Flags, DAG, dl);
   // The MEMCPY must go outside the CALLSEQ_START..END.
   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                              CallSeqStart.getNode()->getOperand(1),
                              SDLoc(MemcpyCall));
   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                          NewCallSeqStart.getNode());
   return NewCallSeqStart;
 }
 
 SDValue PPCTargetLowering::LowerCall_64SVR4(
     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
     bool isTailCall, bool isPatchPoint,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
     ImmutableCallSite *CS) const {
 
   bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   unsigned NumOps = Outs.size();
   bool hasNest = false;
   bool IsSibCall = false;
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   unsigned PtrByteSize = 8;
 
   MachineFunction &MF = DAG.getMachineFunction();
 
   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
     IsSibCall = true;
 
   // Mark this function as potentially containing a function that contains a
   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
          "fastcc not supported on varargs functions");
 
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned NumBytes = LinkageSize;
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
   unsigned &QFPR_idx = FPR_idx;
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
   static const MCPhysReg VSRH[] = {
     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
   };
 
   const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = 13;
   const unsigned NumVRs  = array_lengthof(VR);
   const unsigned NumQFPRs = NumFPRs;
 
   // When using the fast calling convention, we don't provide backing for
   // arguments that will be in registers.
   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
 
   // Add up all the space actually used.
   for (unsigned i = 0; i != NumOps; ++i) {
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
     EVT OrigVT = Outs[i].ArgVT;
 
     if (Flags.isNest())
       continue;
 
     if (CallConv == CallingConv::Fast) {
       if (Flags.isByVal())
         NumGPRsUsed += (Flags.getByValSize()+7)/8;
       else
         switch (ArgVT.getSimpleVT().SimpleTy) {
         default: llvm_unreachable("Unexpected ValueType for argument!");
         case MVT::i1:
         case MVT::i32:
         case MVT::i64:
           if (++NumGPRsUsed <= NumGPRs)
             continue;
           break;
         case MVT::v4i32:
         case MVT::v8i16:
         case MVT::v16i8:
         case MVT::v2f64:
         case MVT::v2i64:
         case MVT::v1i128:
           if (++NumVRsUsed <= NumVRs)
             continue;
           break;
         case MVT::v4f32:
           // When using QPX, this is handled like a FP register, otherwise, it
           // is an Altivec register.
           if (Subtarget.hasQPX()) {
             if (++NumFPRsUsed <= NumFPRs)
               continue;
           } else {
             if (++NumVRsUsed <= NumVRs)
               continue;
           }
           break;
         case MVT::f32:
         case MVT::f64:
         case MVT::v4f64: // QPX
         case MVT::v4i1:  // QPX
           if (++NumFPRsUsed <= NumFPRs)
             continue;
           break;
         }
     }
 
     /* Respect alignment of argument on the stack.  */
     unsigned Align =
       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
 
     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
     if (Flags.isInConsecutiveRegsLast())
       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   }
 
   unsigned NumBytesActuallyUsed = NumBytes;
 
   // The prolog code of the callee may store up to 8 GPR argument registers to
   // the stack, allowing va_start to index over them in memory if its varargs.
   // Because we cannot tell if this is needed on the caller side, we have to
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
 
   // Tail call needs the stack to be aligned.
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
 
   int SPDiff = 0;
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
   if (!IsSibCall)
     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
 
   // To protect arguments on the stack from being clobbered in a tail call,
   // force all the loads to happen before doing any other lowering.
   if (isTailCall)
     Chain = DAG.getStackArgumentTokenFactor(Chain);
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!IsSibCall)
     Chain = DAG.getCALLSEQ_START(Chain,
                                  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be move somewhere else
   // later.
   SDValue LROp, FPOp;
   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   // Set up a copy of the stack pointer for use loading and storing any
   // arguments that may not fit in the registers available for argument
   // passing.
   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
 
   // Figure out which arguments are going to go in registers, and which in
   // memory.  Also, if this is a vararg function, floating point operations
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
   unsigned ArgOffset = LinkageSize;
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   SmallVector<SDValue, 8> MemOpChains;
   for (unsigned i = 0; i != NumOps; ++i) {
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
     EVT OrigVT = Outs[i].ArgVT;
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
 
     // We re-align the argument offset for each argument, except when using the
     // fast calling convention, when we need to make sure we do that only when
     // we'll actually use a stack slot.
     auto ComputePtrOff = [&]() {
       /* Respect alignment of argument on the stack.  */
       unsigned Align =
         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
 
       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
 
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     };
 
     if (CallConv != CallingConv::Fast) {
       ComputePtrOff();
 
       /* Compute GPR index associated with argument offset.  */
       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
       GPR_idx = std::min(GPR_idx, NumGPRs);
     }
 
     // Promote integers to 64-bit values.
     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
     }
 
     // FIXME memcpy is used way more than necessary.  Correctness first.
     // Note: "by value" is code for passing a structure by value, not
     // basic types.
     if (Flags.isByVal()) {
       // Note: Size includes alignment padding, so
       //   struct x { short a; char b; }
       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
       // These are the proper values we need for right-justifying the
       // aggregate in a parameter register.
       unsigned Size = Flags.getByValSize();
 
       // An empty aggregate parameter takes up no storage and no
       // registers.
       if (Size == 0)
         continue;
 
       if (CallConv == CallingConv::Fast)
         ComputePtrOff();
 
       // All aggregates smaller than 8 bytes must be passed right-justified.
       if (Size==1 || Size==2 || Size==4) {
         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                         MachinePointerInfo(), VT);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
           ArgOffset += PtrByteSize;
           continue;
         }
       }
 
       if (GPR_idx == NumGPRs && Size < 8) {
         SDValue AddPtr = PtrOff;
         if (!isLittleEndian) {
           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
                                           PtrOff.getValueType());
           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
         }
         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                           CallSeqStart,
                                                           Flags, DAG, dl);
         ArgOffset += PtrByteSize;
         continue;
       }
       // Copy entire object into memory.  There are cases where gcc-generated
       // code assumes it is there, even if it could be put entirely into
       // registers.  (This is not what the doc says.)
 
       // FIXME: The above statement is likely due to a misunderstanding of the
       // documents.  All arguments must be copied into the parameter area BY
       // THE CALLEE in the event that the callee takes the address of any
       // formal argument.  That has not yet been implemented.  However, it is
       // reasonable to use the stack area as a staging area for the register
       // load.
 
       // Skip this for small aggregates, as we will use the same slot for a
       // right-justified copy, below.
       if (Size >= 8)
         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
                                                           CallSeqStart,
                                                           Flags, DAG, dl);
 
       // When a register is available, pass a small aggregate right-justified.
       if (Size < 8 && GPR_idx != NumGPRs) {
         // The easiest way to get this right-justified in a register
         // is to copy the structure into the rightmost portion of a
         // local variable slot, then load the whole slot into the
         // register.
         // FIXME: The memcpy seems to produce pretty awful code for
         // small aggregates, particularly for packed ones.
         // FIXME: It would be preferable to use the slot in the
         // parameter save area instead of a new local variable.
         SDValue AddPtr = PtrOff;
         if (!isLittleEndian) {
           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
         }
         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                           CallSeqStart,
                                                           Flags, DAG, dl);
 
         // Load the slot into the register.
         SDValue Load =
             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
         MemOpChains.push_back(Load.getValue(1));
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
         // Done with this argument.
         ArgOffset += PtrByteSize;
         continue;
       }
 
       // For aggregates larger than PtrByteSize, copy the pieces of the
       // object that fit into registers from the parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
           SDValue Load =
               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
           break;
         }
       }
       continue;
     }
 
     switch (Arg.getSimpleValueType().SimpleTy) {
     default: llvm_unreachable("Unexpected ValueType for argument!");
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
       if (Flags.isNest()) {
         // The 'nest' parameter, if any, is passed in R11.
         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
         hasNest = true;
         break;
       }
 
       // These can be scalar arguments or elements of an integer array type
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != NumGPRs) {
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
 
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
         if (CallConv == CallingConv::Fast)
           ArgOffset += PtrByteSize;
       }
       if (CallConv != CallingConv::Fast)
         ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
     case MVT::f64: {
       // These can be scalar arguments or elements of a float array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // float aggregates.
 
       // Named arguments go into FPRs first, and once they overflow, the
       // remaining arguments go into GPRs and then the parameter save area.
       // Unnamed arguments for vararg functions always go to GPRs and
       // then the parameter save area.  For now, put all arguments to vararg
       // routines always in both locations (FPR *and* GPR or stack slot).
       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
       bool NeededLoad = false;
 
       // First load the argument into the next available FPR.
       if (FPR_idx != NumFPRs)
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
       // Next, load the argument into GPR or stack slot if needed.
       if (!NeedGPROrStack)
         ;
       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
         // once we support fp <-> gpr moves.
 
         // In the non-vararg case, this can only ever happen in the
         // presence of f32 array types, since otherwise we never run
         // out of FPRs before running out of GPRs.
         SDValue ArgVal;
 
         // Double values are always passed in a single GPR.
         if (Arg.getValueType() != MVT::f32) {
           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
 
         // Non-array float values are extended and passed in a GPR.
         } else if (!Flags.isInConsecutiveRegs()) {
           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
 
         // If we have an array of floats, we collect every odd element
         // together with its predecessor into one GPR.
         } else if (ArgOffset % PtrByteSize != 0) {
           SDValue Lo, Hi;
           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
           if (!isLittleEndian)
             std::swap(Lo, Hi);
           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
 
         // The final element, if even, goes into the first half of a GPR.
         } else if (Flags.isInConsecutiveRegsLast()) {
           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
           if (!isLittleEndian)
             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
                                  DAG.getConstant(32, dl, MVT::i32));
 
         // Non-final even elements are skipped; they will be handled
         // together the with subsequent argument on the next go-around.
         } else
           ArgVal = SDValue();
 
         if (ArgVal.getNode())
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
 
         // Single-precision floating-point values are mapped to the
         // second (rightmost) word of the stack doubleword.
         if (Arg.getValueType() == MVT::f32 &&
             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
         }
 
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
 
         NeededLoad = true;
       }
       // When passing an array of floats, the array occupies consecutive
       // space in the argument area; only round up to the next doubleword
       // at the end of the array.  Otherwise, each float takes 8 bytes.
       if (CallConv != CallingConv::Fast || NeededLoad) {
         ArgOffset += (Arg.getValueType() == MVT::f32 &&
                       Flags.isInConsecutiveRegs()) ? 4 : 8;
         if (Flags.isInConsecutiveRegsLast())
           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       }
       break;
     }
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
     case MVT::v1i128:
       if (!Subtarget.hasQPX()) {
       // These can be scalar arguments or elements of a vector array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
 
       // For a varargs call, named arguments go into VRs or on the stack as
       // usual; unnamed arguments always go to the stack or the corresponding
       // GPRs when within range.  For now, we always put the value in both
       // locations (or even all three).
       if (isVarArg) {
         // We could elide this store in the case where the object fits
         // entirely in R registers.  Maybe later.
         SDValue Store =
             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
           SDValue Load =
               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
 
           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
                            Arg.getSimpleValueType() == MVT::v2i64) ?
                           VSRH[VR_idx] : VR[VR_idx];
           ++VR_idx;
 
           RegsToPass.push_back(std::make_pair(VReg, Load));
         }
         ArgOffset += 16;
         for (unsigned i=0; i<16; i+=PtrByteSize) {
           if (GPR_idx == NumGPRs)
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                    DAG.getConstant(i, dl, PtrVT));
           SDValue Load =
               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
         break;
       }
 
       // Non-varargs Altivec params go into VRs or on the stack.
       if (VR_idx != NumVRs) {
         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
                          Arg.getSimpleValueType() == MVT::v2i64) ?
                         VSRH[VR_idx] : VR[VR_idx];
         ++VR_idx;
 
         RegsToPass.push_back(std::make_pair(VReg, Arg));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
 
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, true, MemOpChains,
                          TailCallArguments, dl);
         if (CallConv == CallingConv::Fast)
           ArgOffset += 16;
       }
 
       if (CallConv != CallingConv::Fast)
         ArgOffset += 16;
       break;
       } // not QPX
 
       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
              "Invalid QPX parameter type");
 
       /* fall through */
     case MVT::v4f64:
     case MVT::v4i1: {
       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
       if (isVarArg) {
         // We could elide this store in the case where the object fits
         // entirely in R registers.  Maybe later.
         SDValue Store =
             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
         MemOpChains.push_back(Store);
         if (QFPR_idx != NumQFPRs) {
           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
                                      PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
         }
         ArgOffset += (IsF32 ? 16 : 32);
         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
           if (GPR_idx == NumGPRs)
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                    DAG.getConstant(i, dl, PtrVT));
           SDValue Load =
               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
         break;
       }
 
       // Non-varargs QPX params go into registers or on the stack.
       if (QFPR_idx != NumQFPRs) {
         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
 
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, true, MemOpChains,
                          TailCallArguments, dl);
         if (CallConv == CallingConv::Fast)
           ArgOffset += (IsF32 ? 16 : 32);
       }
 
       if (CallConv != CallingConv::Fast)
         ArgOffset += (IsF32 ? 16 : 32);
       break;
       }
     }
   }
 
   assert(NumBytesActuallyUsed == ArgOffset);
   (void)NumBytesActuallyUsed;
 
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   // Check if this is an indirect call (MTCTR/BCTRL).
   // See PrepareCall() for more information about calls through function
   // pointers in the 64-bit SVR4 ABI.
   if (!isTailCall && !isPatchPoint &&
       !isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee)) {
     // Load r2 into a virtual register and store it to the TOC save area.
     setUsesTOCBasePtr(DAG);
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     Chain = DAG.getStore(
         Val.getValue(1), dl, Val, AddPtr,
         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
     // This does not mean the MTCTR instruction must use R12; it's easier
     // to model this as an extra parameter, so do that.
     if (isELFv2ABI && !isPatchPoint)
       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
   if (isTailCall && !IsSibCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
                     TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
                     SPDiff, NumBytes, Ins, InVals, CS);
 }
 
 SDValue PPCTargetLowering::LowerCall_Darwin(
     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
     bool isTailCall, bool isPatchPoint,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
     ImmutableCallSite *CS) const {
 
   unsigned NumOps = Outs.size();
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Mark this function as potentially containing a function that contains a
   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned NumBytes = LinkageSize;
 
   // Add up all the space actually used.
   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
   // they all go in registers, but we must reserve stack space for them for
   // possible use by the caller.  In varargs or 64-bit calls, parameters are
   // assigned stack space in order, with padding so Altivec parameters are
   // 16-byte aligned.
   unsigned nAltivecParamsAtEnd = 0;
   for (unsigned i = 0; i != NumOps; ++i) {
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
     // Varargs Altivec parameters are padded to a 16 byte boundary.
     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
       if (!isVarArg && !isPPC64) {
         // Non-varargs Altivec parameters go after all the non-Altivec
         // parameters; handle those later so we know how much padding we need.
         nAltivecParamsAtEnd++;
         continue;
       }
       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
       NumBytes = ((NumBytes+15)/16)*16;
     }
     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
   }
 
   // Allow for Altivec parameters at the end, if needed.
   if (nAltivecParamsAtEnd) {
     NumBytes = ((NumBytes+15)/16)*16;
     NumBytes += 16*nAltivecParamsAtEnd;
   }
 
   // The prolog code of the callee may store up to 8 GPR argument registers to
   // the stack, allowing va_start to index over them in memory if its varargs.
   // Because we cannot tell if this is needed on the caller side, we have to
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
 
   // Tail call needs the stack to be aligned.
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
 
   // To protect arguments on the stack from being clobbered in a tail call,
   // force all the loads to happen before doing any other lowering.
   if (isTailCall)
     Chain = DAG.getStackArgumentTokenFactor(Chain);
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                                dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be move somewhere else
   // later.
   SDValue LROp, FPOp;
   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   // Set up a copy of the stack pointer for use loading and storing any
   // arguments that may not fit in the registers available for argument
   // passing.
   SDValue StackPtr;
   if (isPPC64)
     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
   else
     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
 
   // Figure out which arguments are going to go in registers, and which in
   // memory.  Also, if this is a vararg function, floating point operations
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
   unsigned ArgOffset = LinkageSize;
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
   const unsigned NumGPRs = array_lengthof(GPR_32);
   const unsigned NumFPRs = 13;
   const unsigned NumVRs  = array_lengthof(VR);
 
   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   SmallVector<SDValue, 8> MemOpChains;
   for (unsigned i = 0; i != NumOps; ++i) {
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
 
     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
 
     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
 
     // On PPC64, promote integers to 64-bit values.
     if (isPPC64 && Arg.getValueType() == MVT::i32) {
       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
     }
 
     // FIXME memcpy is used way more than necessary.  Correctness first.
     // Note: "by value" is code for passing a structure by value, not
     // basic types.
     if (Flags.isByVal()) {
       unsigned Size = Flags.getByValSize();
       // Very small objects are passed right-justified.  Everything else is
       // passed left-justified.
       if (Size==1 || Size==2) {
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                         MachinePointerInfo(), VT);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
           ArgOffset += PtrByteSize;
         } else {
           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
                                           PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                             CallSeqStart,
                                                             Flags, DAG, dl);
           ArgOffset += PtrByteSize;
         }
         continue;
       }
       // Copy entire object into memory.  There are cases where gcc-generated
       // code assumes it is there, even if it could be put entirely into
       // registers.  (This is not what the doc says.)
       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
                                                         CallSeqStart,
                                                         Flags, DAG, dl);
 
       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
       // copy the pieces of the object that fit into registers from the
       // parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
           SDValue Load =
               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
           break;
         }
       }
       continue;
     }
 
     switch (Arg.getSimpleValueType().SimpleTy) {
     default: llvm_unreachable("Unexpected ValueType for argument!");
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
       if (GPR_idx != NumGPRs) {
         if (Arg.getValueType() == MVT::i1)
           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
 
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
       } else {
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
       }
       ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
     case MVT::f64:
       if (FPR_idx != NumFPRs) {
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
         if (isVarArg) {
           SDValue Store =
               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Store);
 
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
             SDValue Load =
                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
             SDValue Load =
                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
         } else {
           // If we have any FPRs remaining, we may also have GPRs remaining.
           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
           // GPRs.
           if (GPR_idx != NumGPRs)
             ++GPR_idx;
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
             ++GPR_idx;
         }
       } else
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
       if (isPPC64)
         ArgOffset += 8;
       else
         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
     case MVT::v16i8:
       if (isVarArg) {
         // These go aligned on the stack, or in the corresponding R registers
         // when within range.  The Darwin PPC ABI doc claims they also go in
         // V registers; in fact gcc does this only for arguments that are
         // prototyped, not for those that match the ...  We do it for all
         // arguments, seems to work.
         while (ArgOffset % 16 !=0) {
           ArgOffset += PtrByteSize;
           if (GPR_idx != NumGPRs)
             GPR_idx++;
         }
         // We could elide this store in the case where the object fits
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                              DAG.getConstant(ArgOffset, dl, PtrVT));
         SDValue Store =
             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
           SDValue Load =
               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
         ArgOffset += 16;
         for (unsigned i=0; i<16; i+=PtrByteSize) {
           if (GPR_idx == NumGPRs)
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                    DAG.getConstant(i, dl, PtrVT));
           SDValue Load =
               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
         break;
       }
 
       // Non-varargs Altivec params generally go in registers, but have
       // stack space allocated at the end.
       if (VR_idx != NumVRs) {
         // Doesn't have GPR space allocated.
         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
       } else if (nAltivecParamsAtEnd==0) {
         // We are emitting Altivec params in order.
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, true, MemOpChains,
                          TailCallArguments, dl);
         ArgOffset += 16;
       }
       break;
     }
   }
   // If all Altivec parameters fit in registers, as they usually do,
   // they get stack space following the non-Altivec parameters.  We
   // don't track this here because nobody below needs it.
   // If there are more Altivec parameters than fit in registers emit
   // the stores here.
   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
     unsigned j = 0;
     // Offset is aligned; skip 1st 12 params which go in V registers.
     ArgOffset = ((ArgOffset+15)/16)*16;
     ArgOffset += 12*16;
     for (unsigned i = 0; i != NumOps; ++i) {
       SDValue Arg = OutVals[i];
       EVT ArgType = Outs[i].VT;
       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
         if (++j > NumVRs) {
           SDValue PtrOff;
           // We are emitting Altivec params in order.
           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                            isPPC64, isTailCall, true, MemOpChains,
                            TailCallArguments, dl);
           ArgOffset += 16;
         }
       }
     }
   }
 
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   // On Darwin, R12 must contain the address of an indirect callee.  This does
   // not mean the MTCTR instruction must use R12; it's easier to model this as
   // an extra parameter, so do that.
   if (!isTailCall &&
       !isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee) &&
       !isBLACompatibleAddress(Callee, DAG))
     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
                                                    PPC::R12), Callee));
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
   if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
                     TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
                     /* unused except on PPC64 ELFv1 */ false, DAG,
                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
                     NumBytes, Ins, InVals, CS);
 }
 
 bool
 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   MachineFunction &MF, bool isVarArg,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC_PPC);
 }
 
 SDValue
 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<SDValue> &OutVals,
                                const SDLoc &dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                  *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
 
   SDValue Flag;
   SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
     SDValue Arg = OutVals[i];
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::AExt:
       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::ZExt:
       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     }
 
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const MCPhysReg *I =
     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
   if (I) {
     for (; *I; ++I) {
 
       if (PPC::G8RCRegClass.contains(*I))
         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
       else if (PPC::F8RCRegClass.contains(*I))
         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
       else if (PPC::CRRCRegClass.contains(*I))
         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
       else if (PPC::VRRCRegClass.contains(*I))
         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
       else
         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
     }
   }
 
   RetOps[0] = Chain;  // Update chain.
 
   // Add the flag if we have it.
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
 }
 
 SDValue
 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
   // Get the corect type for integers.
   EVT IntVT = Op.getValueType();
 
   // Get the inputs.
   SDValue Chain = Op.getOperand(0);
   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
   // Build a DYNAREAOFFSET node.
   SDValue Ops[2] = {Chain, FPSIdx};
   SDVTList VTs = DAG.getVTList(IntVT);
   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
 }
 
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
                                              SelectionDAG &DAG) const {
   // When we pop the dynamic allocation we need to restore the SP link.
   SDLoc dl(Op);
 
   // Get the corect type for pointers.
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Construct the stack pointer operand.
   bool isPPC64 = Subtarget.isPPC64();
   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
 
   // Get the operands for the STACKRESTORE.
   SDValue Chain = Op.getOperand(0);
   SDValue SaveSP = Op.getOperand(1);
 
   // Load the old link SP.
   SDValue LoadLinkSP =
       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
 
   // Store the old link SP.
   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   int RASI = FI->getReturnAddrSaveIndex();
 
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
     // Allocate the frame index for frame pointer save area.
     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
     // Save the result.
     FI->setReturnAddrSaveIndex(RASI);
   }
   return DAG.getFrameIndex(RASI, PtrVT);
 }
 
 SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   int FPSI = FI->getFramePointerSaveIndex();
 
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);
   }
   return DAG.getFrameIndex(FPSI, PtrVT);
 }
 
 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                    SelectionDAG &DAG) const {
   // Get the inputs.
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
   SDLoc dl(Op);
 
   // Get the corect type for pointers.
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   // Negate the size.
   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
                                 DAG.getConstant(0, dl, PtrVT), Size);
   // Construct a node for the frame pointer save index.
   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
   // Build a DYNALLOC node.
   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
 }
 
 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
                                                      SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   bool isPPC64 = Subtarget.isPPC64();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   int FI = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
   return DAG.getFrameIndex(FI, PtrVT);
 }
 
 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDLoc DL(Op);
   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
                      DAG.getVTList(MVT::i32, MVT::Other),
                      Op.getOperand(0), Op.getOperand(1));
 }
 
 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc DL(Op);
   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
                      Op.getOperand(0), Op.getOperand(1));
 }
 
 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getValueType().isVector())
     return LowerVectorLoad(Op, DAG);
 
   assert(Op.getValueType() == MVT::i1 &&
          "Custom lowering only for i1 loads");
 
   // First, load 8 bits into 32 bits, then truncate to 1 bit.
 
   SDLoc dl(Op);
   LoadSDNode *LD = cast<LoadSDNode>(Op);
 
   SDValue Chain = LD->getChain();
   SDValue BasePtr = LD->getBasePtr();
   MachineMemOperand *MMO = LD->getMemOperand();
 
   SDValue NewLD =
       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
                      BasePtr, MVT::i8, MMO);
   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
 
   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
   return DAG.getMergeValues(Ops, dl);
 }
 
 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getOperand(1).getValueType().isVector())
     return LowerVectorStore(Op, DAG);
 
   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
          "Custom lowering only for i1 stores");
 
   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
 
   SDLoc dl(Op);
   StoreSDNode *ST = cast<StoreSDNode>(Op);
 
   SDValue Chain = ST->getChain();
   SDValue BasePtr = ST->getBasePtr();
   SDValue Value = ST->getValue();
   MachineMemOperand *MMO = ST->getMemOperand();
 
   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
                       Value);
   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
 }
 
 // FIXME: Remove this once the ANDI glue bug is fixed:
 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::i1 &&
          "Custom lowering only for i1 results");
 
   SDLoc DL(Op);
   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
                      Op.getOperand(0));
 }
 
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // Not FP? Not a fsel.
   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
   // We might be able to do better than this under some circumstances, but in
   // general, fsel-based lowering of select is a finite-math-only optimization.
   // For more information, see section F.3 of the 2.06 ISA specification.
   if (!DAG.getTarget().Options.NoInfsFPMath ||
       !DAG.getTarget().Options.NoNaNsFPMath)
     return Op;
   // TODO: Propagate flags from the select rather than global settings.
   SDNodeFlags Flags;
   Flags.setNoInfs(true);
   Flags.setNoNaNs(true);
 
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
   EVT CmpVT = Op.getOperand(0).getValueType();
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   SDLoc dl(Op);
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
   SDValue Sel1;
   if (isFloatingPointZero(RHS))
     switch (CC) {
     default: break;       // SETUO etc aren't handled by fsel.
     case ISD::SETNE:
       std::swap(TV, FV);
     case ISD::SETEQ:
       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
     case ISD::SETULT:
     case ISD::SETLT:
       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
     case ISD::SETOGE:
     case ISD::SETGE:
       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
     case ISD::SETUGT:
     case ISD::SETGT:
       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
     case ISD::SETOLE:
     case ISD::SETLE:
       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
     }
 
   SDValue Cmp;
   switch (CC) {
   default: break;       // SETUO etc aren't handled by fsel.
   case ISD::SETNE:
     std::swap(TV, FV);
   case ISD::SETEQ:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   }
   return Op;
 }
 
 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
                                                SelectionDAG &DAG,
                                                const SDLoc &dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
   if (Src.getValueType() == MVT::f32)
     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
 
   SDValue Tmp;
   switch (Op.getSimpleValueType().SimpleTy) {
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(
         Op.getOpcode() == ISD::FP_TO_SINT
             ? PPCISD::FCTIWZ
             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
         dl, MVT::f64, Src);
     break;
   case MVT::i64:
     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
            "i64 FP_TO_UINT is supported only with FPCVT");
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
                                                         PPCISD::FCTIDUZ,
                       dl, MVT::f64, Src);
     break;
   }
 
   // Convert the FP value to an int value through memory.
   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
   MachinePointerInfo MPI =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
 
   // Emit a store to the stack slot.
   SDValue Chain;
   if (i32Stack) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineMemOperand *MMO =
       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
   } else
     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias on big endian.
   if (Op.getValueType() == MVT::i32 && !i32Stack) {
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, dl, FIPtr.getValueType()));
     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
   }
 
   RLI.Chain = Chain;
   RLI.Ptr = FIPtr;
   RLI.MPI = MPI;
 }
 
 /// \brief Custom lowers floating point to integer conversions to use
 /// the direct move instructions available in ISA 2.07 to avoid the
 /// need for load/store combinations.
 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
                                                     SelectionDAG &DAG,
                                                     const SDLoc &dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
 
   if (Src.getValueType() == MVT::f32)
     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
 
   SDValue Tmp;
   switch (Op.getSimpleValueType().SimpleTy) {
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(
         Op.getOpcode() == ISD::FP_TO_SINT
             ? PPCISD::FCTIWZ
             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
         dl, MVT::f64, Src);
     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
     break;
   case MVT::i64:
     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
            "i64 FP_TO_UINT is supported only with FPCVT");
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
                                                         PPCISD::FCTIDUZ,
                       dl, MVT::f64, Src);
     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
     break;
   }
   return Tmp;
 }
 
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                           const SDLoc &dl) const {
   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
 
   ReuseLoadInfo RLI;
   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
 
   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
                      RLI.Alignment,
                      RLI.IsInvariant ? MachineMemOperand::MOInvariant
                                      : MachineMemOperand::MONone,
                      RLI.AAInfo, RLI.Ranges);
 }
 
 // We're trying to insert a regular store, S, and then a load, L. If the
 // incoming value, O, is a load, we might just be able to have our load use the
 // address used by O. However, we don't know if anything else will store to
 // that address before we can load from it. To prevent this situation, we need
 // to insert our load, L, into the chain as a peer of O. To do this, we give L
 // the same chain operand as O, we create a token factor from the chain results
 // of O and L, and we replace all uses of O's chain result with that token
 // factor (see spliceIntoChain below for this last part).
 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
                                             ReuseLoadInfo &RLI,
                                             SelectionDAG &DAG,
                                             ISD::LoadExtType ET) const {
   SDLoc dl(Op);
   if (ET == ISD::NON_EXTLOAD &&
       (Op.getOpcode() == ISD::FP_TO_UINT ||
        Op.getOpcode() == ISD::FP_TO_SINT) &&
       isOperationLegalOrCustom(Op.getOpcode(),
                                Op.getOperand(0).getValueType())) {
 
     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
     return true;
   }
 
   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
       LD->isNonTemporal())
     return false;
   if (LD->getMemoryVT() != MemVT)
     return false;
 
   RLI.Ptr = LD->getBasePtr();
   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
     assert(LD->getAddressingMode() == ISD::PRE_INC &&
            "Non-pre-inc AM on PPC?");
     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
                           LD->getOffset());
   }
 
   RLI.Chain = LD->getChain();
   RLI.MPI = LD->getPointerInfo();
   RLI.IsInvariant = LD->isInvariant();
   RLI.Alignment = LD->getAlignment();
   RLI.AAInfo = LD->getAAInfo();
   RLI.Ranges = LD->getRanges();
 
   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
   return true;
 }
 
 // Given the head of the old chain, ResChain, insert a token factor containing
 // it and NewResChain, and make users of ResChain now be users of that token
 // factor.
 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
                                         SDValue NewResChain,
                                         SelectionDAG &DAG) const {
   if (!ResChain)
     return;
 
   SDLoc dl(NewResChain);
 
   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                            NewResChain, DAG.getUNDEF(MVT::Other));
   assert(TF.getNode() != NewResChain.getNode() &&
          "A new TF really is required here");
 
   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
 /// \brief Analyze profitability of direct move
 /// prefer float load to int load plus direct move
 /// when there is no integer use of int load
 static bool directMoveIsProfitable(const SDValue &Op) {
   SDNode *Origin = Op.getOperand(0).getNode();
   if (Origin->getOpcode() != ISD::LOAD)
     return true;
 
   for (SDNode::use_iterator UI = Origin->use_begin(),
                             UE = Origin->use_end();
        UI != UE; ++UI) {
 
     // Only look at the users of the loaded value.
     if (UI.getUse().get().getResNo() != 0)
       continue;
 
     if (UI->getOpcode() != ISD::SINT_TO_FP &&
         UI->getOpcode() != ISD::UINT_TO_FP)
       return true;
   }
 
   return false;
 }
 
 /// \brief Custom lowers integer to floating point conversions to use
 /// the direct move instructions available in ISA 2.07 to avoid the
 /// need for load/store combinations.
 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
                                                     SelectionDAG &DAG,
                                                     const SDLoc &dl) const {
   assert((Op.getValueType() == MVT::f32 ||
           Op.getValueType() == MVT::f64) &&
          "Invalid floating point type as target of conversion");
   assert(Subtarget.hasFPCVT() &&
          "Int to FP conversions with direct moves require FPCVT");
   SDValue FP;
   SDValue Src = Op.getOperand(0);
   bool SinglePrec = Op.getValueType() == MVT::f32;
   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
 
   if (WordInt) {
     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
                      dl, MVT::f64, Src);
     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
   }
   else {
     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
   }
 
   return FP;
 }
 
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
       return SDValue();
 
     SDValue Value = Op.getOperand(0);
     // The values are now known to be -1 (false) or 1 (true). To convert this
     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
 
     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
 
     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
     if (Op.getValueType() != MVT::v4f64)
       Value = DAG.getNode(ISD::FP_ROUND, dl,
                           Op.getValueType(), Value,
                           DAG.getIntPtrConstant(1, dl));
     return Value;
   }
 
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
 
   if (Op.getOperand(0).getValueType() == MVT::i1)
     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
 
   // If we have direct moves, we can do all the conversion, skip the store/load
   // however, without FPCVT we can't do most conversions.
   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
       Subtarget.isPPC64() && Subtarget.hasFPCVT())
     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
 
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
          "UINT_TO_FP is supported only with FPCVT");
 
   // If we have FCFIDS, then use it when converting to single-precision.
   // Otherwise, convert to double-precision and then round.
   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
                                                             : PPCISD::FCFIDS)
                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
                                                             : PPCISD::FCFID);
   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
                   ? MVT::f32
                   : MVT::f64;
 
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDValue SINT = Op.getOperand(0);
     // When converting to single-precision, we actually need to convert
     // to double-precision first and then round to single-precision.
     // To avoid double-rounding effects during that operation, we have
     // to prepare the input operand.  Bits that might be truncated when
     // converting to double-precision are replaced by a bit that won't
     // be lost at this stage, but is below the single-precision rounding
     // position.
     //
     // However, if -enable-unsafe-fp-math is in effect, accept double
     // rounding to avoid the extra overhead.
     if (Op.getValueType() == MVT::f32 &&
         !Subtarget.hasFPCVT() &&
         !DAG.getTarget().Options.UnsafeFPMath) {
 
       // Twiddle input to make sure the low 11 bits are zero.  (If this
       // is the case, we are guaranteed the value will fit into the 53 bit
       // mantissa of an IEEE double-precision value without rounding.)
       // If any of those low 11 bits were not zero originally, make sure
       // bit 12 (value 2048) is set instead, so that the final rounding
       // to single-precision gets the correct result.
       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
                           Round, DAG.getConstant(2047, dl, MVT::i64));
       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
                           Round, DAG.getConstant(-2048, dl, MVT::i64));
 
       // However, we cannot use that value unconditionally: if the magnitude
       // of the input value is small, the bit-twiddling we did above might
       // end up visibly changing the output.  Fortunately, in that case, we
       // don't need to twiddle bits since the original input will convert
       // exactly to double-precision floating-point already.  Therefore,
       // construct a conditional to use the original value if the top 11
       // bits are all sign-bit copies, and use the rounded value computed
       // above otherwise.
       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
                                  SINT, DAG.getConstant(53, dl, MVT::i32));
       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
                          Cond, DAG.getConstant(1, dl, MVT::i64));
       Cond = DAG.getSetCC(dl, MVT::i32,
                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
 
       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
     }
 
     ReuseLoadInfo RLI;
     SDValue Bits;
 
     MachineFunction &MF = DAG.getMachineFunction();
     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
       Bits =
           DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment,
                       RLI.IsInvariant ? MachineMemOperand::MOInvariant
                                       : MachineMemOperand::MONone,
                       RLI.AAInfo, RLI.Ranges);
       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
     } else if (Subtarget.hasLFIWAX() &&
                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
       MachineMemOperand *MMO =
         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
                                      DAG.getVTList(MVT::f64, MVT::Other),
                                      Ops, MVT::i32, MMO);
       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
     } else if (Subtarget.hasFPCVT() &&
                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
       MachineMemOperand *MMO =
         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
                                      DAG.getVTList(MVT::f64, MVT::Other),
                                      Ops, MVT::i32, MMO);
       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
     } else if (((Subtarget.hasLFIWAX() &&
                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
                 (Subtarget.hasFPCVT() &&
                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
                SINT.getOperand(0).getValueType() == MVT::i32) {
       MachineFrameInfo *FrameInfo = MF.getFrameInfo();
       EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
       SDValue Store =
           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
                        MachinePointerInfo::getFixedStack(
                            DAG.getMachineFunction(), FrameIdx));
 
       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
              "Expected an i32 store");
 
       RLI.Ptr = FIdx;
       RLI.Chain = Store;
       RLI.MPI =
           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
       RLI.Alignment = 4;
 
       MachineMemOperand *MMO =
         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
                                      Ops, MVT::i32, MMO);
     } else
       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
 
     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
 
     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
       FP = DAG.getNode(ISD::FP_ROUND, dl,
                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
     return FP;
   }
 
   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
          "Unhandled INT_TO_FP type in custom expander!");
   // Since we only generate this in 64-bit mode, we can take advantage of
   // 64-bit registers.  In particular, sign extend the input value into the
   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   SDValue Ld;
   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
     ReuseLoadInfo RLI;
     bool ReusingLoad;
     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
                                             DAG))) {
       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
       SDValue Store =
           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
                        MachinePointerInfo::getFixedStack(
                            DAG.getMachineFunction(), FrameIdx));
 
       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
              "Expected an i32 store");
 
       RLI.Ptr = FIdx;
       RLI.Chain = Store;
       RLI.MPI =
           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
       RLI.Alignment = 4;
     }
 
     MachineMemOperand *MMO =
       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
                                  Ops, MVT::i32, MMO);
     if (ReusingLoad)
       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
   } else {
     assert(Subtarget.isPPC64() &&
            "i32->FP without LFIWAX supported only on PPC64");
 
     int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
                                 Op.getOperand(0));
 
     // STD the extended value into the stack slot.
     SDValue Store = DAG.getStore(
         DAG.getEntryNode(), dl, Ext64, FIdx,
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
 
     // Load the value as a double.
     Ld = DAG.getLoad(
         MVT::f64, dl, Store, FIdx,
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
   }
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
                      DAG.getIntPtrConstant(0, dl));
   return FP;
 }
 
 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                             SelectionDAG &DAG) const {
   SDLoc dl(Op);
   /*
    The rounding mode is in bits 30:31 of FPSR, and has the following
    settings:
      00 Round to nearest
      01 Round to 0
      10 Round to +inf
      11 Round to -inf
 
   FLT_ROUNDS, on the other hand, expects the following:
     -1 Undefined
      0 Round to 0
      1 Round to nearest
      2 Round to +inf
      3 Round to -inf
 
   To perform the conversion, we do:
     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
   */
 
   MachineFunction &MF = DAG.getMachineFunction();
   EVT VT = Op.getValueType();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   // Save FP Control Word to register
   EVT NodeTys[] = {
     MVT::f64,    // return register
     MVT::Glue    // unused in this context
   };
   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
 
   // Save FP register to stack slot
   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
                                MachinePointerInfo());
 
   // Load FP Control Word from low 32 bits of stack slot.
   SDValue Four = DAG.getConstant(4, dl, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
 
   // Transform as necessary
   SDValue CWD1 =
     DAG.getNode(ISD::AND, dl, MVT::i32,
                 CWD, DAG.getConstant(3, dl, MVT::i32));
   SDValue CWD2 =
     DAG.getNode(ISD::SRL, dl, MVT::i32,
                 DAG.getNode(ISD::AND, dl, MVT::i32,
                             DAG.getNode(ISD::XOR, dl, MVT::i32,
                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
                             DAG.getConstant(3, dl, MVT::i32)),
                 DAG.getConstant(1, dl, MVT::i32));
 
   SDValue RetVal =
     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
 
   return DAG.getNode((VT.getSizeInBits() < 16 ?
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
 }
 
 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   SDLoc dl(Op);
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
          "Unexpected SHL!");
 
   // Expand into a bunch of logical ops.  Note that these ops
   // depend on the PPC behavior for oversized shift amounts.
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
                              DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
   SDValue OutOps[] = { OutLo, OutHi };
   return DAG.getMergeValues(OutOps, dl);
 }
 
 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
          "Unexpected SRL!");
 
   // Expand into a bunch of logical ops.  Note that these ops
   // depend on the PPC behavior for oversized shift amounts.
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
                              DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
   SDValue OutOps[] = { OutLo, OutHi };
   return DAG.getMergeValues(OutOps, dl);
 }
 
 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
          "Unexpected SRA!");
 
   // Expand into a bunch of logical ops, followed by a select_cc.
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Amt = Op.getOperand(2);
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
                              DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
                                   Tmp4, Tmp6, ISD::SETLE);
   SDValue OutOps[] = { OutLo, OutHi };
   return DAG.getMergeValues(OutOps, dl);
 }
 
 //===----------------------------------------------------------------------===//
 // Vector related lowering.
 //
 
 /// BuildSplatI - Build a canonical splati of Val with an element size of
 /// SplatSize.  Cast the result to VT.
 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
                            SelectionDAG &DAG, const SDLoc &dl) {
   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
 
   static const MVT VTys[] = { // canonical VT to use for each size.
     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
   };
 
   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
 
   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
   if (Val == -1)
     SplatSize = 1;
 
   EVT CanonicalVT = VTys[SplatSize-1];
 
   // Build a canonical splat for this value.
   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
 }
 
 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, dl, MVT::i32), Op);
 }
 
 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
                                 SelectionDAG &DAG, const SDLoc &dl,
                                 EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
 }
 
 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
                                 EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
 }
 
 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
 /// amount.  The result has the specified value type.
 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
                            SelectionDAG &DAG, const SDLoc &dl) {
   // Force LHS/RHS to be the right type.
   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
 
   int Ops[16];
   for (unsigned i = 0; i != 16; ++i)
     Ops[i] = i + Amt;
   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
   return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
 // this case more efficiently than a constant pool load, lower it to the
 // sequence of ops that should be used.
 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                              SelectionDAG &DAG) const {
   SDLoc dl(Op);
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
     // We first build an i32 vector, load it into a QPX register,
     // then convert it to a floating-point vector and compare it
     // to a zero vector to get the boolean result.
     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
     int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
     MachinePointerInfo PtrInfo =
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
     assert(BVN->getNumOperands() == 4 &&
       "BUILD_VECTOR for v4i1 does not have 4 operands");
 
     bool IsConst = true;
     for (unsigned i = 0; i < 4; ++i) {
       if (BVN->getOperand(i).isUndef()) continue;
       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
         IsConst = false;
         break;
       }
     }
 
     if (IsConst) {
       Constant *One =
         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
       Constant *NegOne =
         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
 
       Constant *CV[4];
       for (unsigned i = 0; i < 4; ++i) {
         if (BVN->getOperand(i).isUndef())
           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
         else if (isNullConstant(BVN->getOperand(i)))
           CV[i] = NegOne;
         else
           CV[i] = One;
       }
 
       Constant *CP = ConstantVector::get(CV);
       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
                                           16 /* alignment */);
 
       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
       return DAG.getMemIntrinsicNode(
           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
     }
 
     SmallVector<SDValue, 4> Stores;
     for (unsigned i = 0; i < 4; ++i) {
       if (BVN->getOperand(i).isUndef()) continue;
 
       unsigned Offset = 4*i;
       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
 
       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
       if (StoreSize > 4) {
         Stores.push_back(
             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
                               PtrInfo.getWithOffset(Offset), MVT::i32));
       } else {
         SDValue StoreValue = BVN->getOperand(i);
         if (StoreSize < 4)
           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
 
         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
                                       PtrInfo.getWithOffset(Offset)));
       }
     }
 
     SDValue StoreChain;
     if (!Stores.empty())
       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
     else
       StoreChain = DAG.getEntryNode();
 
     // Now load from v4i32 into the QPX register; this will extend it to
     // v4i64 but not yet convert it to a floating point. Nevertheless, this
     // is typed as v4f64 because the QPX register integer states are not
     // explicitly represented.
 
     SDValue Ops[] = {StoreChain,
                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
                      FIdx};
     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
 
     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
       dl, VTs, Ops, MVT::v4i32, PtrInfo);
     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
       LoadedVect);
 
     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
 
     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
   }
 
   // All other QPX vectors are handled by generic code.
   if (Subtarget.hasQPX())
     return SDValue();
 
   // Check if this is a splat of a constant value.
   APInt APSplatBits, APSplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
       SplatBitSize > 32)
     return SDValue();
 
   unsigned SplatBits = APSplatBits.getZExtValue();
   unsigned SplatUndef = APSplatUndef.getZExtValue();
   unsigned SplatSize = SplatBitSize / 8;
 
   // First, handle single instruction cases.
 
   // All zeros?
   if (SplatBits == 0) {
     // Canonicalize all zero vectors to be v4i32.
     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
     }
     return Op;
   }
 
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
                     (32-SplatBitSize));
   if (SextVal >= -16 && SextVal <= 15)
     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
 
   // Two instruction sequences.
 
   // If this value is in the range [-32,30] and is even, use:
   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
   // If this value is in the range [17,31] and is odd, use:
   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
   // If this value is in the range [-31,-17] and is odd, use:
   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
   // Note the last two are three-instruction sequences.
   if (SextVal >= -32 && SextVal <= 31) {
     // To avoid having these optimizations undone by constant folding,
     // we convert to a pseudo that will be expanded later into one of
     // the above forms.
     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
     if (VT == Op.getValueType())
       return RetVal;
     else
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
   }
 
   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
   // for fneg/fabs.
   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
     // Make -1 and vspltisw -1:
     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
 
     // Make the VSLW intrinsic, computing 0x8000_0000.
     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
                                    OnesV, DAG, dl);
 
     // xor by OnesV to invert it.
     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
   // Check to see if this is a wide variety of vsplti*, binop self cases.
   static const signed char SplatCsts[] = {
     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
   };
 
   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
     int i = SplatCsts[idx];
 
     // Figure out what shift amount will be used by altivec if shifted by i in
     // this splat size.
     unsigned TypeShiftAmt = i & (SplatBitSize-1);
 
     // vsplti + shl self.
     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
       static const unsigned IIDs[] = { // Intrinsic to use for each size.
         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
         Intrinsic::ppc_altivec_vslw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + srl self.
     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
       static const unsigned IIDs[] = { // Intrinsic to use for each size.
         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
         Intrinsic::ppc_altivec_vsrw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + sra self.
     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
       static const unsigned IIDs[] = { // Intrinsic to use for each size.
         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
         Intrinsic::ppc_altivec_vsraw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + rol self.
     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
       static const unsigned IIDs[] = { // Intrinsic to use for each size.
         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
         Intrinsic::ppc_altivec_vrlw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // t = vsplti c, result = vsldoi t, t, 1
     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
     }
     // t = vsplti c, result = vsldoi t, t, 2
     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
     }
     // t = vsplti c, result = vsldoi t, t, 3
     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
     }
   }
 
   return SDValue();
 }
 
 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
 /// the specified operations to build the shuffle.
 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
                                       SDValue RHS, SelectionDAG &DAG,
                                       const SDLoc &dl) {
   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
 
   enum {
     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
     OP_VMRGHW,
     OP_VMRGLW,
     OP_VSPLTISW0,
     OP_VSPLTISW1,
     OP_VSPLTISW2,
     OP_VSPLTISW3,
     OP_VSLDOI4,
     OP_VSLDOI8,
     OP_VSLDOI12
   };
 
   if (OpNum == OP_COPY) {
     if (LHSID == (1*9+2)*9+3) return LHS;
     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
     return RHS;
   }
 
   SDValue OpLHS, OpRHS;
   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
 
   int ShufIdxs[16];
   switch (OpNum) {
   default: llvm_unreachable("Unknown i32 permute!");
   case OP_VMRGHW:
     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
     break;
   case OP_VMRGLW:
     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
     break;
   case OP_VSPLTISW0:
     for (unsigned i = 0; i != 16; ++i)
       ShufIdxs[i] = (i&3)+0;
     break;
   case OP_VSPLTISW1:
     for (unsigned i = 0; i != 16; ++i)
       ShufIdxs[i] = (i&3)+4;
     break;
   case OP_VSPLTISW2:
     for (unsigned i = 0; i != 16; ++i)
       ShufIdxs[i] = (i&3)+8;
     break;
   case OP_VSPLTISW3:
     for (unsigned i = 0; i != 16; ++i)
       ShufIdxs[i] = (i&3)+12;
     break;
   case OP_VSLDOI4:
     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
   case OP_VSLDOI8:
     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
   case OP_VSLDOI12:
     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
   }
   EVT VT = OpLHS.getValueType();
   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
   return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
 /// return the code it can be lowered into.  Worst case, it can always be
 /// lowered into a vperm.
 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDLoc dl(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   EVT VT = Op.getValueType();
   bool isLittleEndian = Subtarget.isLittleEndian();
 
   unsigned ShiftElts, InsertAtByte;
   bool Swap;
   if (Subtarget.hasP9Vector() &&
       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
                            isLittleEndian)) {
     if (Swap)
       std::swap(V1, V2);
     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
     if (ShiftElts) {
       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
     }
     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
   }
 
   if (Subtarget.hasVSX()) {
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
     }
 
     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
     }
 
   }
 
   if (Subtarget.hasQPX()) {
     if (VT.getVectorNumElements() != 4)
       return SDValue();
 
     if (V2.isUndef()) V2 = V1;
 
     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
     if (AlignIdx != -1) {
       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
                          DAG.getConstant(AlignIdx, dl, MVT::i32));
     } else if (SVOp->isSplat()) {
       int SplatIdx = SVOp->getSplatIndex();
       if (SplatIdx >= 4) {
         std::swap(V1, V2);
         SplatIdx -= 4;
       }
 
       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
                          DAG.getConstant(SplatIdx, dl, MVT::i32));
     }
 
     // Lower this into a qvgpci/qvfperm pair.
 
     // Compute the qvgpci literal
     unsigned idx = 0;
     for (unsigned i = 0; i < 4; ++i) {
       int m = SVOp->getMaskElt(i);
       unsigned mm = m >= 0 ? (unsigned) m : i;
       idx |= mm << (3-i)*3;
     }
 
     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
                              DAG.getConstant(idx, dl, MVT::i32));
     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
   }
 
   // Cases that are handled by instructions that take permute immediates
   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
   // selected by the instruction selector.
   if (V2.isUndef()) {
     if (PPC::isSplatShuffleMask(SVOp, 1) ||
         PPC::isSplatShuffleMask(SVOp, 2) ||
         PPC::isSplatShuffleMask(SVOp, 4) ||
         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
         (Subtarget.hasP8Altivec() && (
          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
       return Op;
     }
   }
 
   // Altivec has a variety of "shuffle immediates" that take two vector inputs
   // and produce a fixed permutation.  If any of these match, do not lower to
   // VPERM.
   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
       (Subtarget.hasP8Altivec() && (
        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
     return Op;
 
   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
   // perfect shuffle table to emit an optimal matching sequence.
   ArrayRef<int> PermMask = SVOp->getMask();
 
   unsigned PFIndexes[4];
   bool isFourElementShuffle = true;
   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
     unsigned EltNo = 8;   // Start out undef.
     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
       if (PermMask[i*4+j] < 0)
         continue;   // Undef, ignore it.
 
       unsigned ByteSource = PermMask[i*4+j];
       if ((ByteSource & 3) != j) {
         isFourElementShuffle = false;
         break;
       }
 
       if (EltNo == 8) {
         EltNo = ByteSource/4;
       } else if (EltNo != ByteSource/4) {
         isFourElementShuffle = false;
         break;
       }
     }
     PFIndexes[i] = EltNo;
   }
 
   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
   // perfect shuffle vector to determine if it is cost effective to do this as
   // discrete instructions, or whether we should use a vperm.
   // For now, we skip this for little endian until such time as we have a
   // little-endian perfect shuffle table.
   if (isFourElementShuffle && !isLittleEndian) {
     // Compute the index in the perfect shuffle table.
     unsigned PFTableIndex =
       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
 
     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
     unsigned Cost  = (PFEntry >> 30);
 
     // Determining when to avoid vperm is tricky.  Many things affect the cost
     // of vperm, particularly how many times the perm mask needs to be computed.
     // For example, if the perm mask can be hoisted out of a loop or is already
     // used (perhaps because there are multiple permutes with the same shuffle
     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
     // the loop requires an extra register.
     //
     // As a compromise, we only emit discrete instructions if the shuffle can be
     // generated in 3 or fewer operations.  When we have loop information
     // available, if this block is within a loop, we should avoid using vperm
     // for 3-operation perms and use a constant pool load instead.
     if (Cost < 3)
       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
   }
 
   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
   // vector that will get spilled to the constant pool.
   if (V2.isUndef()) V2 = V1;
 
   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
   // that it is in input element units, not in bytes.  Convert now.
 
   // For little endian, the order of the input vectors is reversed, and
   // the permutation mask is complemented with respect to 31.  This is
   // necessary to produce proper semantics with the big-endian-biased vperm
   // instruction.
   EVT EltVT = V1.getValueType().getVectorElementType();
   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
 
   SmallVector<SDValue, 16> ResultMask;
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
 
     for (unsigned j = 0; j != BytesPerElement; ++j)
       if (isLittleEndian)
         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
                                              dl, MVT::i32));
       else
         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
                                              MVT::i32));
   }
 
   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
   if (isLittleEndian)
     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
                        V2, V1, VPermMask);
   else
     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
                        V1, V2, VPermMask);
 }
 
 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
 /// vector comparison.  If it is, return true and fill in Opc/isDot with
 /// information about the intrinsic.
 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
                                  bool &isDot, const PPCSubtarget &Subtarget) {
   unsigned IntrinsicID =
     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
   CompareOpc = -1;
   isDot = false;
   switch (IntrinsicID) {
   default: return false;
     // Comparison predicates.
   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequd_p:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 199;
       isDot = 1;
     } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsd_p:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 967;
       isDot = 1;
     } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtud_p:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 711;
       isDot = 1;
     } else
       return false;
 
     break;
     // VSX predicate comparisons use the same infrastructure
   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
   case Intrinsic::ppc_vsx_xvcmpgedp_p:
   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
   case Intrinsic::ppc_vsx_xvcmpgesp_p:
   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
     if (Subtarget.hasVSX()) {
       switch (IntrinsicID) {
       case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
       case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
       case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
       case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
       case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
       case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
       }
       isDot = 1;
     }
     else
       return false;
 
     break;
 
     // Normal Comparisons.
   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequd:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 199;
       isDot = 0;
     } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsd:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 967;
       isDot = 0;
     } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtud:
     if (Subtarget.hasP8Altivec()) {
       CompareOpc = 711;
       isDot = 0;
     } else
       return false;
 
     break;
   }
   return true;
 }
 
 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
 /// lower, do it, otherwise return null.
 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                    SelectionDAG &DAG) const {
   unsigned IntrinsicID =
     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   if (IntrinsicID == Intrinsic::thread_pointer) {
     // Reads the thread pointer register, used for __builtin_thread_pointer.
     bool is64bit = Subtarget.isPPC64();
     return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
                            is64bit ? MVT::i64 : MVT::i32);
   }
 
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   SDLoc dl(Op);
   int CompareOpc;
   bool isDot;
   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
     return SDValue();    // Don't custom lower most intrinsics.
 
   // If this is a non-dot comparison, make the VCMP node and we are done.
   if (!isDot) {
     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
                               Op.getOperand(1), Op.getOperand(2),
                               DAG.getConstant(CompareOpc, dl, MVT::i32));
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
   }
 
   // Create the PPCISD altivec 'dot' comparison node.
   SDValue Ops[] = {
     Op.getOperand(2),  // LHS
     Op.getOperand(3),  // RHS
     DAG.getConstant(CompareOpc, dl, MVT::i32)
   };
   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
 
   // Now that we have the comparison, emit a copy from the CR to a GPR.
   // This is flagged to the above dot comparison.
   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
                                 DAG.getRegister(PPC::CR6, MVT::i32),
                                 CompNode.getValue(1));
 
   // Unpack the result based on how the target uses it.
   unsigned BitNo;   // Bit # of CR6.
   bool InvertBit;   // Invert result?
   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
   default:  // Can't happen, don't crash on invalid number though.
   case 0:   // Return the value of the EQ bit of CR6.
     BitNo = 0; InvertBit = false;
     break;
   case 1:   // Return the inverted value of the EQ bit of CR6.
     BitNo = 0; InvertBit = true;
     break;
   case 2:   // Return the value of the LT bit of CR6.
     BitNo = 2; InvertBit = false;
     break;
   case 3:   // Return the inverted value of the LT bit of CR6.
     BitNo = 2; InvertBit = true;
     break;
   }
 
   // Shift the bit into the low position.
   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
   // Isolate the bit.
   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
                       DAG.getConstant(1, dl, MVT::i32));
 
   // If we are supposed to, toggle the bit.
   if (InvertBit)
     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
                         DAG.getConstant(1, dl, MVT::i32));
   return Flags;
 }
 
 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
   // instructions), but for smaller types, we need to first extend up to v2i32
   // before doing going farther.
   if (Op.getValueType() == MVT::v2i64) {
     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     if (ExtVT != MVT::v2i32) {
       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
                                         ExtVT.getVectorElementType(), 4)));
       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
                        DAG.getValueType(MVT::v2i32));
     }
 
     return Op;
   }
 
   return SDValue();
 }
 
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                    SelectionDAG &DAG) const {
   SDLoc dl(Op);
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   // Store the input value into Value#0 of the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
                                MachinePointerInfo());
   // Load it out.
   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                                    SelectionDAG &DAG) const {
   SDLoc dl(Op);
   SDNode *N = Op.getNode();
 
   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
          "Unknown extract_vector_elt type");
 
   SDValue Value = N->getOperand(0);
 
   // The first part of this is like the store lowering except that we don't
   // need to track the chain.
 
   // The values are now known to be -1 (false) or 1 (true). To convert this
   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
 
   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
   // understand how to form the extending load.
   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
 
   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
   // Now convert to an integer and store.
   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
     Value);
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SDValue StoreChain = DAG.getEntryNode();
   SDValue Ops[] = {StoreChain,
                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
                    Value, FIdx};
   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
 
   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
     dl, VTs, Ops, MVT::v4i32, PtrInfo);
 
   // Extract the value requested.
   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
 
   SDValue IntVal =
       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
 
   if (!Subtarget.useCRBits())
     return IntVal;
 
   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
 }
 
 /// Lowering for QPX v4i1 loads
 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDLoc dl(Op);
   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
   SDValue LoadChain = LN->getChain();
   SDValue BasePtr = LN->getBasePtr();
 
   if (Op.getValueType() == MVT::v4f64 ||
       Op.getValueType() == MVT::v4f32) {
     EVT MemVT = LN->getMemoryVT();
     unsigned Alignment = LN->getAlignment();
 
     // If this load is properly aligned, then it is legal.
     if (Alignment >= MemVT.getStoreSize())
       return Op;
 
     EVT ScalarVT = Op.getValueType().getScalarType(),
         ScalarMemVT = MemVT.getScalarType();
     unsigned Stride = ScalarMemVT.getStoreSize();
 
     SDValue Vals[4], LoadChains[4];
     for (unsigned Idx = 0; Idx < 4; ++Idx) {
       SDValue Load;
       if (ScalarVT != ScalarMemVT)
         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
                               BasePtr,
                               LN->getPointerInfo().getWithOffset(Idx * Stride),
                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
       else
         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
                            LN->getPointerInfo().getWithOffset(Idx * Stride),
                            MinAlign(Alignment, Idx * Stride),
                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
 
       if (Idx == 0 && LN->isIndexed()) {
         assert(LN->getAddressingMode() == ISD::PRE_INC &&
                "Unknown addressing mode on vector load");
         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
                                   LN->getAddressingMode());
       }
 
       Vals[Idx] = Load;
       LoadChains[Idx] = Load.getValue(1);
 
       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                             DAG.getConstant(Stride, dl,
                                             BasePtr.getValueType()));
     }
 
     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
 
     if (LN->isIndexed()) {
       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
       return DAG.getMergeValues(RetOps, dl);
     }
 
     SDValue RetOps[] = { Value, TF };
     return DAG.getMergeValues(RetOps, dl);
   }
 
   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
 
   // To lower v4i1 from a byte array, we load the byte elements of the
   // vector and then reuse the BUILD_VECTOR logic.
 
   SDValue VectElmts[4], VectElmtChains[4];
   for (unsigned i = 0; i < 4; ++i) {
     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
 
     VectElmts[i] = DAG.getExtLoad(
         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
         LN->getPointerInfo().getWithOffset(i), MVT::i8,
         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
     VectElmtChains[i] = VectElmts[i].getValue(1);
   }
 
   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
 
   SDValue RVals[] = { Value, LoadChain };
   return DAG.getMergeValues(RVals, dl);
 }
 
 /// Lowering for QPX v4i1 stores
 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
                                             SelectionDAG &DAG) const {
   SDLoc dl(Op);
   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
   SDValue StoreChain = SN->getChain();
   SDValue BasePtr = SN->getBasePtr();
   SDValue Value = SN->getValue();
 
   if (Value.getValueType() == MVT::v4f64 ||
       Value.getValueType() == MVT::v4f32) {
     EVT MemVT = SN->getMemoryVT();
     unsigned Alignment = SN->getAlignment();
 
     // If this store is properly aligned, then it is legal.
     if (Alignment >= MemVT.getStoreSize())
       return Op;
 
     EVT ScalarVT = Value.getValueType().getScalarType(),
         ScalarMemVT = MemVT.getScalarType();
     unsigned Stride = ScalarMemVT.getStoreSize();
 
     SDValue Stores[4];
     for (unsigned Idx = 0; Idx < 4; ++Idx) {
       SDValue Ex = DAG.getNode(
           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
       SDValue Store;
       if (ScalarVT != ScalarMemVT)
         Store =
             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
                               SN->getPointerInfo().getWithOffset(Idx * Stride),
                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
       else
         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
                              SN->getPointerInfo().getWithOffset(Idx * Stride),
                              MinAlign(Alignment, Idx * Stride),
                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
 
       if (Idx == 0 && SN->isIndexed()) {
         assert(SN->getAddressingMode() == ISD::PRE_INC &&
                "Unknown addressing mode on vector store");
         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
                                     SN->getAddressingMode());
       }
 
       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                             DAG.getConstant(Stride, dl,
                                             BasePtr.getValueType()));
       Stores[Idx] = Store;
     }
 
     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
 
     if (SN->isIndexed()) {
       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
       return DAG.getMergeValues(RetOps, dl);
     }
 
     return TF;
   }
 
   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
 
   // The values are now known to be -1 (false) or 1 (true). To convert this
   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
 
   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
   // understand how to form the extending load.
   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
 
   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
   // Now convert to an integer and store.
   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
     Value);
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SDValue Ops[] = {StoreChain,
                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
                    Value, FIdx};
   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
 
   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
     dl, VTs, Ops, MVT::v4i32, PtrInfo);
 
   // Move data into the byte array.
   SDValue Loads[4], LoadChains[4];
   for (unsigned i = 0; i < 4; ++i) {
     unsigned Offset = 4*i;
     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
 
     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
                            PtrInfo.getWithOffset(Offset));
     LoadChains[i] = Loads[i].getValue(1);
   }
 
   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
 
   SDValue Stores[4];
   for (unsigned i = 0; i < 4; ++i) {
     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
 
     Stores[i] = DAG.getTruncStore(
         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
         SN->getAAInfo());
   }
 
   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
 
   return StoreChain;
 }
 
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   if (Op.getValueType() == MVT::v4i32) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
 
     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
 
     SDValue RHSSwap =   // = vrlw RHS, 16
       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
 
     // Shrinkify inputs to v8i16.
     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
 
     // Low parts multiplied together, generating 32-bit results (we ignore the
     // top parts).
     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
                                         LHS, RHS, DAG, dl, MVT::v4i32);
 
     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
     // Shift the high parts up 16 bits.
     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
                               Neg16, DAG, dl);
     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
   } else if (Op.getValueType() == MVT::v8i16) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
 
     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
 
     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
                             LHS, RHS, Zero, DAG, dl);
   } else if (Op.getValueType() == MVT::v16i8) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
     bool isLittleEndian = Subtarget.isLittleEndian();
 
     // Multiply the even 8-bit parts, producing 16-bit sums.
     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
                                            LHS, RHS, DAG, dl, MVT::v8i16);
     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
 
     // Multiply the odd 8-bit parts, producing 16-bit sums.
     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
                                           LHS, RHS, DAG, dl, MVT::v8i16);
     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
 
     // Merge the results together.  Because vmuleub and vmuloub are
     // instructions with a big-endian bias, we must reverse the
     // element numbering and reverse the meaning of "odd" and "even"
     // when generating little endian code.
     int Ops[16];
     for (unsigned i = 0; i != 8; ++i) {
       if (isLittleEndian) {
         Ops[i*2  ] = 2*i;
         Ops[i*2+1] = 2*i+16;
       } else {
         Ops[i*2  ] = 2*i+1;
         Ops[i*2+1] = 2*i+1+16;
       }
     }
     if (isLittleEndian)
       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
     else
       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
   } else {
     llvm_unreachable("Unknown mul to lower!");
   }
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::VASTART:
     return LowerVASTART(Op, DAG);
 
   case ISD::VAARG:
     return LowerVAARG(Op, DAG);
 
   case ISD::VACOPY:
     return LowerVACOPY(Op, DAG);
 
   case ISD::STACKRESTORE:
     return LowerSTACKRESTORE(Op, DAG);
 
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
 
   case ISD::GET_DYNAMIC_AREA_OFFSET:
     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
 
   case ISD::EH_DWARF_CFA:
     return LowerEH_DWARF_CFA(Op, DAG);
 
   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
 
   case ISD::LOAD:               return LowerLOAD(Op, DAG);
   case ISD::STORE:              return LowerSTORE(Op, DAG);
   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
                                                       SDLoc(Op));
   case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
 
   // Lower 64-bit shifts.
   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
 
   // Vector-related lowering.
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
 
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
 
   // Frame & Return address.
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   }
 }
 
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
   SDLoc dl(N);
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Do not know how to custom type legalize this operation!");
   case ISD::READCYCLECOUNTER: {
     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
 
     Results.push_back(RTB);
     Results.push_back(RTB.getValue(1));
     Results.push_back(RTB.getValue(2));
     break;
   }
   case ISD::INTRINSIC_W_CHAIN: {
     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
         Intrinsic::ppc_is_decremented_ctr_nonzero)
       break;
 
     assert(N->getValueType(0) == MVT::i1 &&
            "Unexpected result type for CTR decrement intrinsic");
     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
                                  N->getValueType(0));
     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
                                  N->getOperand(1));
 
     Results.push_back(NewInt);
     Results.push_back(NewInt.getValue(1));
     break;
   }
   case ISD::VAARG: {
     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
       return;
 
     EVT VT = N->getValueType(0);
 
     if (VT == MVT::i64) {
       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
 
       Results.push_back(NewNode);
       Results.push_back(NewNode.getValue(1));
     }
     return;
   }
   case ISD::FP_ROUND_INREG: {
     assert(N->getValueType(0) == MVT::ppcf128);
     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
                              MVT::f64, N->getOperand(0),
                              DAG.getIntPtrConstant(0, dl));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
                              MVT::f64, N->getOperand(0),
                              DAG.getIntPtrConstant(1, dl));
 
     // Add the two halves of the long double in round-to-zero mode.
     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
 
     // We know the low half is about to be thrown away, so just use something
     // convenient.
     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
                                 FPreg, FPreg));
     return;
   }
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     // LowerFP_TO_INT() can only handle f32 and f64.
     if (N->getOperand(0).getValueType() == MVT::ppcf128)
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
   }
 }
 
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
 
 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   Function *Func = Intrinsic::getDeclaration(M, Id);
   return Builder.CreateCall(Func, {});
 }
 
 // The mappings for emitLeading/TrailingFence is taken from
 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
                                          AtomicOrdering Ord, bool IsStore,
                                          bool IsLoad) const {
   if (Ord == AtomicOrdering::SequentiallyConsistent)
     return callIntrinsic(Builder, Intrinsic::ppc_sync);
   if (isReleaseOrStronger(Ord))
     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
   return nullptr;
 }
 
 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
                                           AtomicOrdering Ord, bool IsStore,
                                           bool IsLoad) const {
   if (IsLoad && isAcquireOrStronger(Ord))
     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
   // FIXME: this is too conservative, a dependent branch + isync is enough.
   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
   return nullptr;
 }
 
 MachineBasicBlock *
 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
                                     unsigned AtomicSize,
                                     unsigned BinOpcode) const {
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   auto LoadMnemonic = PPC::LDARX;
   auto StoreMnemonic = PPC::STDCX;
   switch (AtomicSize) {
   default:
     llvm_unreachable("Unexpected size of atomic entity");
   case 1:
     LoadMnemonic = PPC::LBARX;
     StoreMnemonic = PPC::STBCX;
     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
     break;
   case 2:
     LoadMnemonic = PPC::LHARX;
     StoreMnemonic = PPC::STHCX;
     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
     break;
   case 4:
     LoadMnemonic = PPC::LWARX;
     StoreMnemonic = PPC::STWCX;
     break;
   case 8:
     LoadMnemonic = PPC::LDARX;
     StoreMnemonic = PPC::STDCX;
     break;
   }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
   MachineFunction::iterator It = ++BB->getIterator();
 
   unsigned dest = MI.getOperand(0).getReg();
   unsigned ptrA = MI.getOperand(1).getReg();
   unsigned ptrB = MI.getOperand(2).getReg();
   unsigned incr = MI.getOperand(3).getReg();
   DebugLoc dl = MI.getDebugLoc();
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   F->insert(It, loopMBB);
   F->insert(It, exitMBB);
   exitMBB->splice(exitMBB->begin(), BB,
                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
   unsigned TmpReg = (!BinOpcode) ? incr :
     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
                                            : &PPC::GPRCRegClass);
 
   //  thisMBB:
   //   ...
   //   fallthrough --> loopMBB
   BB->addSuccessor(loopMBB);
 
   //  loopMBB:
   //   l[wd]arx dest, ptr
   //   add r0, dest, incr
   //   st[wd]cx. r0, ptr
   //   bne- loopMBB
   //   fallthrough --> exitMBB
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
     .addReg(ptrA).addReg(ptrB);
   if (BinOpcode)
     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
   BuildMI(BB, dl, TII->get(StoreMnemonic))
     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
   BB->addSuccessor(exitMBB);
 
   //  exitMBB:
   //   ...
   BB = exitMBB;
   return BB;
 }
 
 MachineBasicBlock *
 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
                                             MachineBasicBlock *BB,
                                             bool is8bit, // operation
                                             unsigned BinOpcode) const {
   // If we support part-word atomic mnemonics, just use them
   if (Subtarget.hasPartwordAtomics())
     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
 
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   // In 64 bit mode we have to use 64 bits for addresses, even though the
   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = Subtarget.isPPC64();
   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
   MachineFunction::iterator It = ++BB->getIterator();
 
   unsigned dest = MI.getOperand(0).getReg();
   unsigned ptrA = MI.getOperand(1).getReg();
   unsigned ptrB = MI.getOperand(2).getReg();
   unsigned incr = MI.getOperand(3).getReg();
   DebugLoc dl = MI.getDebugLoc();
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   F->insert(It, loopMBB);
   F->insert(It, exitMBB);
   exitMBB->splice(exitMBB->begin(), BB,
                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
                                           : &PPC::GPRCRegClass;
   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
   unsigned Ptr1Reg;
   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
 
   //  thisMBB:
   //   ...
   //   fallthrough --> loopMBB
   BB->addSuccessor(loopMBB);
 
   // The 4-byte load must be aligned, while a char or short may be
   // anywhere in the word.  Hence all this nasty bookkeeping code.
   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
   //   xori shift, shift1, 24 [16]
   //   rlwinm ptr, ptr1, 0, 0, 29
   //   slw incr2, incr, shift
   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
   //   slw mask, mask2, shift
   //  loopMBB:
   //   lwarx tmpDest, ptr
   //   add tmp, tmpDest, incr2
   //   andc tmp2, tmpDest, mask
   //   and tmp3, tmp, mask
   //   or tmp4, tmp3, tmp2
   //   stwcx. tmp4, ptr
   //   bne- loopMBB
   //   fallthrough --> exitMBB
   //   srw dest, tmpDest, shift
   if (ptrA != ZeroReg) {
     Ptr1Reg = RegInfo.createVirtualRegister(RC);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
       .addReg(ptrA).addReg(ptrB);
   } else {
     Ptr1Reg = ptrB;
   }
   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
   if (is64bit)
     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
       .addReg(Ptr1Reg).addImm(0).addImm(61);
   else
     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
       .addReg(incr).addReg(ShiftReg);
   if (is8bit)
     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
   else {
     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
   }
   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
       .addReg(Mask2Reg).addReg(ShiftReg);
 
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
     .addReg(ZeroReg).addReg(PtrReg);
   if (BinOpcode)
     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
       .addReg(Incr2Reg).addReg(TmpDestReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
     .addReg(TmpDestReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
   BuildMI(BB, dl, TII->get(PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
   BB->addSuccessor(exitMBB);
 
   //  exitMBB:
   //   ...
   BB = exitMBB;
   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
     .addReg(ShiftReg);
   return BB;
 }
 
 llvm::MachineBasicBlock *
 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const {
   DebugLoc DL = MI.getDebugLoc();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   const BasicBlock *BB = MBB->getBasicBlock();
   MachineFunction::iterator I = ++MBB->getIterator();
 
   // Memory Reference
   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
 
   unsigned DstReg = MI.getOperand(0).getReg();
   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
   assert(RC->hasType(MVT::i32) && "Invalid destination!");
   unsigned mainDstReg = MRI.createVirtualRegister(RC);
   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
 
   MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
   // For v = setjmp(buf), we generate
   //
   // thisMBB:
   //  SjLjSetup mainMBB
   //  bl mainMBB
   //  v_restore = 1
   //  b sinkMBB
   //
   // mainMBB:
   //  buf[LabelOffset] = LR
   //  v_main = 0
   //
   // sinkMBB:
   //  v = phi(main, restore)
   //
 
   MachineBasicBlock *thisMBB = MBB;
   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
   MF->insert(I, mainMBB);
   MF->insert(I, sinkMBB);
 
   MachineInstrBuilder MIB;
 
   // Transfer the remainder of BB and its successor edges to sinkMBB.
   sinkMBB->splice(sinkMBB->begin(), MBB,
                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
 
   // Note that the structure of the jmp_buf used here is not compatible
   // with that used by libc, and is not designed to be. Specifically, it
   // stores only those 'reserved' registers that LLVM does not otherwise
   // understand how to spill. Also, by convention, by the time this
   // intrinsic is called, Clang has already stored the frame address in the
   // first slot of the buffer and stack address in the third. Following the
   // X86 target code, we'll store the jump address in the second slot. We also
   // need to save the TOC pointer (R2) to handle jumps between shared
   // libraries, and that will be stored in the fourth slot. The thread
   // identifier (R13) is not affected.
 
   // thisMBB:
   const int64_t LabelOffset = 1 * PVT.getStoreSize();
   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
   const int64_t BPOffset    = 4 * PVT.getStoreSize();
 
   // Prepare IP either in reg.
   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
   unsigned BufReg = MI.getOperand(1).getReg();
 
   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
     setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
             .addReg(PPC::X2)
             .addImm(TOCOffset)
             .addReg(BufReg);
     MIB.setMemRefs(MMOBegin, MMOEnd);
   }
 
   // Naked functions never have a base pointer, and so we use r1. For all
   // other functions, this decision must be delayed until during PEI.
   unsigned BaseReg;
   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
   else
     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
 
   MIB = BuildMI(*thisMBB, MI, DL,
                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
             .addReg(BaseReg)
             .addImm(BPOffset)
             .addReg(BufReg);
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
   MIB.addRegMask(TRI->getNoPreservedMask());
 
   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
 
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
           .addMBB(mainMBB);
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
 
   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
 
   // mainMBB:
   //  mainDstReg = 0
   MIB =
       BuildMI(mainMBB, DL,
               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
 
   // Store IP
   if (Subtarget.isPPC64()) {
     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
             .addReg(LabelReg)
             .addImm(LabelOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
             .addReg(LabelReg)
             .addImm(LabelOffset)
             .addReg(BufReg);
   }
 
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
   mainMBB->addSuccessor(sinkMBB);
 
   // sinkMBB:
   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
           TII->get(PPC::PHI), DstReg)
     .addReg(mainDstReg).addMBB(mainMBB)
     .addReg(restoreDstReg).addMBB(thisMBB);
 
   MI.eraseFromParent();
   return sinkMBB;
 }
 
 MachineBasicBlock *
 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const {
   DebugLoc DL = MI.getDebugLoc();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // Memory Reference
   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
 
   MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
 
   const TargetRegisterClass *RC =
     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   unsigned Tmp = MRI.createVirtualRegister(RC);
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
   unsigned BP =
       (PVT == MVT::i64)
           ? PPC::X30
           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
                                                               : PPC::R30);
 
   MachineInstrBuilder MIB;
 
   const int64_t LabelOffset = 1 * PVT.getStoreSize();
   const int64_t SPOffset    = 2 * PVT.getStoreSize();
   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
   const int64_t BPOffset    = 4 * PVT.getStoreSize();
 
   unsigned BufReg = MI.getOperand(0).getReg();
 
   // Reload FP (the jumped-to function may not have had a
   // frame pointer, and if so, then its r31 will be restored
   // as necessary).
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
             .addImm(0)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
             .addImm(0)
             .addReg(BufReg);
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Reload IP
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
             .addImm(LabelOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
             .addImm(LabelOffset)
             .addReg(BufReg);
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Reload SP
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
             .addImm(SPOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
             .addImm(SPOffset)
             .addReg(BufReg);
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Reload BP
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
             .addImm(BPOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
             .addImm(BPOffset)
             .addReg(BufReg);
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Reload TOC
   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
     setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
             .addImm(TOCOffset)
             .addReg(BufReg);
 
     MIB.setMemRefs(MMOBegin, MMOEnd);
   }
 
   // Jump
   BuildMI(*MBB, MI, DL,
           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
 
   MI.eraseFromParent();
   return MBB;
 }
 
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *BB) const {
   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
       // Call lowering should have added an r2 operand to indicate a dependence
       // on the TOC base pointer value. It can't however, because there is no
       // way to mark the dependence as implicit there, and so the stackmap code
       // will confuse it with a regular operand. Instead, add the dependence
       // here.
       setUsesTOCBasePtr(*BB->getParent());
       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
     }
 
     return emitPatchPoint(MI, BB);
   }
 
   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
     return emitEHSjLjSetJmp(MI, BB);
   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
     return emitEHSjLjLongJmp(MI, BB);
   }
 
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
   // control-flow patterns.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction::iterator It = ++BB->getIterator();
 
   MachineFunction *F = BB->getParent();
 
   if (Subtarget.hasISEL() &&
       (MI.getOpcode() == PPC::SELECT_CC_I4 ||
        MI.getOpcode() == PPC::SELECT_CC_I8 ||
        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
     SmallVector<MachineOperand, 2> Cond;
     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
         MI.getOpcode() == PPC::SELECT_CC_I8)
       Cond.push_back(MI.getOperand(4));
     else
       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
     Cond.push_back(MI.getOperand(1));
 
     DebugLoc dl = MI.getDebugLoc();
     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
              MI.getOpcode() == PPC::SELECT_CC_I8 ||
              MI.getOpcode() == PPC::SELECT_CC_F4 ||
              MI.getOpcode() == PPC::SELECT_CC_F8 ||
              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
              MI.getOpcode() == PPC::SELECT_I4 ||
              MI.getOpcode() == PPC::SELECT_I8 ||
              MI.getOpcode() == PPC::SELECT_F4 ||
              MI.getOpcode() == PPC::SELECT_F8 ||
              MI.getOpcode() == PPC::SELECT_QFRC ||
              MI.getOpcode() == PPC::SELECT_QSRC ||
              MI.getOpcode() == PPC::SELECT_QBRC ||
              MI.getOpcode() == PPC::SELECT_VRRC ||
              MI.getOpcode() == PPC::SELECT_VSFRC ||
              MI.getOpcode() == PPC::SELECT_VSSRC ||
              MI.getOpcode() == PPC::SELECT_VSRC) {
     // The incoming instruction knows the destination vreg to set, the
     // condition code register to branch on, the true/false values to
     // select between, and a branch opcode to use.
 
     //  thisMBB:
     //  ...
     //   TrueVal = ...
     //   cmpTY ccX, r1, r2
     //   bCC copy1MBB
     //   fallthrough --> copy0MBB
     MachineBasicBlock *thisMBB = BB;
     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
     DebugLoc dl = MI.getDebugLoc();
     F->insert(It, copy0MBB);
     F->insert(It, sinkMBB);
 
     // Transfer the remainder of BB and its successor edges to sinkMBB.
     sinkMBB->splice(sinkMBB->begin(), BB,
                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
 
     // Next, add the true and fallthrough blocks as its successors.
     BB->addSuccessor(copy0MBB);
     BB->addSuccessor(sinkMBB);
 
     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
         MI.getOpcode() == PPC::SELECT_QFRC ||
         MI.getOpcode() == PPC::SELECT_QSRC ||
         MI.getOpcode() == PPC::SELECT_QBRC ||
         MI.getOpcode() == PPC::SELECT_VRRC ||
         MI.getOpcode() == PPC::SELECT_VSFRC ||
         MI.getOpcode() == PPC::SELECT_VSSRC ||
         MI.getOpcode() == PPC::SELECT_VSRC) {
       BuildMI(BB, dl, TII->get(PPC::BC))
           .addReg(MI.getOperand(1).getReg())
           .addMBB(sinkMBB);
     } else {
       unsigned SelectPred = MI.getOperand(4).getImm();
       BuildMI(BB, dl, TII->get(PPC::BCC))
           .addImm(SelectPred)
           .addReg(MI.getOperand(1).getReg())
           .addMBB(sinkMBB);
     }
 
     //  copy0MBB:
     //   %FalseValue = ...
     //   # fallthrough to sinkMBB
     BB = copy0MBB;
 
     // Update machine-CFG edges
     BB->addSuccessor(sinkMBB);
 
     //  sinkMBB:
     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
     //  ...
     BB = sinkMBB;
     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
         .addReg(MI.getOperand(3).getReg())
         .addMBB(copy0MBB)
         .addReg(MI.getOperand(2).getReg())
         .addMBB(thisMBB);
   } else if (MI.getOpcode() == PPC::ReadTB) {
     // To read the 64-bit time-base register on a 32-bit target, we read the
     // two halves. Should the counter have wrapped while it was being read, we
     // need to try again.
     // ...
     // readLoop:
     // mfspr Rx,TBU # load from TBU
     // mfspr Ry,TB  # load from TB
     // mfspr Rz,TBU # load from TBU
     // cmpw crX,Rx,Rz # check if 'old'='new'
     // bne readLoop   # branch if they're not equal
     // ...
 
     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
     DebugLoc dl = MI.getDebugLoc();
     F->insert(It, readMBB);
     F->insert(It, sinkMBB);
 
     // Transfer the remainder of BB and its successor edges to sinkMBB.
     sinkMBB->splice(sinkMBB->begin(), BB,
                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
 
     BB->addSuccessor(readMBB);
     BB = readMBB;
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
     unsigned LoReg = MI.getOperand(0).getReg();
     unsigned HiReg = MI.getOperand(1).getReg();
 
     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
 
     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
 
     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
       .addReg(HiReg).addReg(ReadAgainReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
 
     BB->addSuccessor(readMBB);
     BB->addSuccessor(sinkMBB);
   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
 
   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
     BB = EmitAtomicBinary(MI, BB, 4, 0);
   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
     BB = EmitAtomicBinary(MI, BB, 8, 0);
 
   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
            (Subtarget.hasPartwordAtomics() &&
             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
            (Subtarget.hasPartwordAtomics() &&
             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
 
     auto LoadMnemonic = PPC::LDARX;
     auto StoreMnemonic = PPC::STDCX;
     switch (MI.getOpcode()) {
     default:
       llvm_unreachable("Compare and swap of unknown size");
     case PPC::ATOMIC_CMP_SWAP_I8:
       LoadMnemonic = PPC::LBARX;
       StoreMnemonic = PPC::STBCX;
       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
       break;
     case PPC::ATOMIC_CMP_SWAP_I16:
       LoadMnemonic = PPC::LHARX;
       StoreMnemonic = PPC::STHCX;
       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
       break;
     case PPC::ATOMIC_CMP_SWAP_I32:
       LoadMnemonic = PPC::LWARX;
       StoreMnemonic = PPC::STWCX;
       break;
     case PPC::ATOMIC_CMP_SWAP_I64:
       LoadMnemonic = PPC::LDARX;
       StoreMnemonic = PPC::STDCX;
       break;
     }
     unsigned dest = MI.getOperand(0).getReg();
     unsigned ptrA = MI.getOperand(1).getReg();
     unsigned ptrB = MI.getOperand(2).getReg();
     unsigned oldval = MI.getOperand(3).getReg();
     unsigned newval = MI.getOperand(4).getReg();
     DebugLoc dl = MI.getDebugLoc();
 
     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
     F->insert(It, loop1MBB);
     F->insert(It, loop2MBB);
     F->insert(It, midMBB);
     F->insert(It, exitMBB);
     exitMBB->splice(exitMBB->begin(), BB,
                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
     BB->addSuccessor(loop1MBB);
 
     // loop1MBB:
     //   l[bhwd]arx dest, ptr
     //   cmp[wd] dest, oldval
     //   bne- midMBB
     // loop2MBB:
     //   st[bhwd]cx. newval, ptr
     //   bne- loopMBB
     //   b exitBB
     // midMBB:
     //   st[bhwd]cx. dest, ptr
     // exitBB:
     BB = loop1MBB;
     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
       .addReg(ptrA).addReg(ptrB);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
       .addReg(oldval).addReg(dest);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
     BB->addSuccessor(loop2MBB);
     BB->addSuccessor(midMBB);
 
     BB = loop2MBB;
     BuildMI(BB, dl, TII->get(StoreMnemonic))
       .addReg(newval).addReg(ptrA).addReg(ptrB);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
     BB->addSuccessor(loop1MBB);
     BB->addSuccessor(exitMBB);
 
     BB = midMBB;
     BuildMI(BB, dl, TII->get(StoreMnemonic))
       .addReg(dest).addReg(ptrA).addReg(ptrB);
     BB->addSuccessor(exitMBB);
 
     //  exitMBB:
     //   ...
     BB = exitMBB;
   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
     // We must use 64-bit registers for addresses when targeting 64-bit,
     // since we're actually doing arithmetic on them.  Other registers
     // can be 32-bit.
     bool is64bit = Subtarget.isPPC64();
     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
 
     unsigned dest = MI.getOperand(0).getReg();
     unsigned ptrA = MI.getOperand(1).getReg();
     unsigned ptrB = MI.getOperand(2).getReg();
     unsigned oldval = MI.getOperand(3).getReg();
     unsigned newval = MI.getOperand(4).getReg();
     DebugLoc dl = MI.getDebugLoc();
 
     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
     F->insert(It, loop1MBB);
     F->insert(It, loop2MBB);
     F->insert(It, midMBB);
     F->insert(It, exitMBB);
     exitMBB->splice(exitMBB->begin(), BB,
                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
                                             : &PPC::GPRCRegClass;
     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
     unsigned Ptr1Reg;
     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
     BB->addSuccessor(loop1MBB);
 
     // The 4-byte load must be aligned, while a char or short may be
     // anywhere in the word.  Hence all this nasty bookkeeping code.
     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
     //   xori shift, shift1, 24 [16]
     //   rlwinm ptr, ptr1, 0, 0, 29
     //   slw newval2, newval, shift
     //   slw oldval2, oldval,shift
     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
     //   slw mask, mask2, shift
     //   and newval3, newval2, mask
     //   and oldval3, oldval2, mask
     // loop1MBB:
     //   lwarx tmpDest, ptr
     //   and tmp, tmpDest, mask
     //   cmpw tmp, oldval3
     //   bne- midMBB
     // loop2MBB:
     //   andc tmp2, tmpDest, mask
     //   or tmp4, tmp2, newval3
     //   stwcx. tmp4, ptr
     //   bne- loop1MBB
     //   b exitBB
     // midMBB:
     //   stwcx. tmpDest, ptr
     // exitBB:
     //   srw dest, tmpDest, shift
     if (ptrA != ZeroReg) {
       Ptr1Reg = RegInfo.createVirtualRegister(RC);
       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
         .addReg(ptrA).addReg(ptrB);
     } else {
       Ptr1Reg = ptrB;
     }
     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
     if (is64bit)
       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
         .addReg(Ptr1Reg).addImm(0).addImm(61);
     else
       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
         .addReg(newval).addReg(ShiftReg);
     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
         .addReg(oldval).addReg(ShiftReg);
     if (is8bit)
       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
     else {
       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
         .addReg(Mask3Reg).addImm(65535);
     }
     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
         .addReg(Mask2Reg).addReg(ShiftReg);
     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
         .addReg(NewVal2Reg).addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
         .addReg(OldVal2Reg).addReg(MaskReg);
 
     BB = loop1MBB;
     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
         .addReg(ZeroReg).addReg(PtrReg);
     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
         .addReg(TmpDestReg).addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
         .addReg(TmpReg).addReg(OldVal3Reg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
     BB->addSuccessor(loop2MBB);
     BB->addSuccessor(midMBB);
 
     BB = loop2MBB;
     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
         .addReg(TmpDestReg).addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
         .addReg(Tmp2Reg).addReg(NewVal3Reg);
     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
         .addReg(ZeroReg).addReg(PtrReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
     BB->addSuccessor(loop1MBB);
     BB->addSuccessor(exitMBB);
 
     BB = midMBB;
     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
       .addReg(ZeroReg).addReg(PtrReg);
     BB->addSuccessor(exitMBB);
 
     //  exitMBB:
     //   ...
     BB = exitMBB;
     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
       .addReg(ShiftReg);
   } else if (MI.getOpcode() == PPC::FADDrtz) {
     // This pseudo performs an FADD with rounding mode temporarily forced
     // to round-to-zero.  We emit this via custom inserter since the FPSCR
     // is not modeled at the SelectionDAG level.
     unsigned Dest = MI.getOperand(0).getReg();
     unsigned Src1 = MI.getOperand(1).getReg();
     unsigned Src2 = MI.getOperand(2).getReg();
     DebugLoc dl = MI.getDebugLoc();
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
 
     // Save FPSCR value.
     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
 
     // Set rounding mode to round-to-zero.
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
 
     // Perform addition.
     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
 
     // Restore FPSCR value.
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
                           ? PPC::ANDIo8
                           : PPC::ANDIo;
     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
                                                   &PPC::GPRCRegClass :
                                                   &PPC::G8RCRegClass);
 
     DebugLoc dl = MI.getDebugLoc();
     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
         .addReg(MI.getOperand(1).getReg())
         .addImm(1);
     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
             MI.getOperand(0).getReg())
         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
     DebugLoc Dl = MI.getDebugLoc();
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
     return BB;
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
 
   MI.eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
 }
 
 //===----------------------------------------------------------------------===//
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
 static std::string getRecipOp(const char *Base, EVT VT) {
   std::string RecipOp(Base);
   if (VT.getScalarType() == MVT::f64)
     RecipOp += "d";
   else
     RecipOp += "f";
 
   if (VT.isVector())
     RecipOp = "vec-" + RecipOp;
 
   return RecipOp;
 }
 
 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps,
                                             bool &UseOneConstNR) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
     TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
     std::string RecipOp = getRecipOp("sqrt", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
 
     RefinementSteps = Recips.getRefinementSteps(RecipOp);
     UseOneConstNR = true;
     return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
 }
 
 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
       (VT == MVT::f64 && Subtarget.hasFRE()) ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
     TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
     std::string RecipOp = getRecipOp("div", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
 
     RefinementSteps = Recips.getRefinementSteps(RecipOp);
     return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
 }
 
 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
   // Note: This functionality is used only when unsafe-fp-math is enabled, and
   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
   // enabled for division), this functionality is redundant with the default
   // combiner logic (once the division -> reciprocal/multiply transformation
   // has taken place). As a result, this matters more for older cores than for
   // newer ones.
 
   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   // reciprocal if there are two or more FDIVs (for embedded cores with only
   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
   switch (Subtarget.getDarwinDirective()) {
   default:
     return 3;
   case PPC::DIR_440:
   case PPC::DIR_A2:
   case PPC::DIR_E500mc:
   case PPC::DIR_E5500:
     return 2;
   }
 }
 
 // isConsecutiveLSLoc needs to work even if all adds have not yet been
 // collapsed, and so we need to look through chains of them.
 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
                                      int64_t& Offset, SelectionDAG &DAG) {
   if (DAG.isBaseWithConstantOffset(Loc)) {
     Base = Loc.getOperand(0);
     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
 
     // The base might itself be a base plus an offset, and if so, accumulate
     // that as well.
     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
   }
 }
 
 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
                             unsigned Bytes, int Dist,
                             SelectionDAG &DAG) {
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
   SDValue BaseLoc = Base->getBasePtr();
   if (Loc.getOpcode() == ISD::FrameIndex) {
     if (BaseLoc.getOpcode() != ISD::FrameIndex)
       return false;
     const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
     int FS  = MFI->getObjectSize(FI);
     int BFS = MFI->getObjectSize(BFI);
     if (FS != BFS || FS != (int)Bytes) return false;
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
 
   SDValue Base1 = Loc, Base2 = BaseLoc;
   int64_t Offset1 = 0, Offset2 = 0;
   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
     return true;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   const GlobalValue *GV1 = nullptr;
   const GlobalValue *GV2 = nullptr;
   Offset1 = 0;
   Offset2 = 0;
   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
   if (isGA1 && isGA2 && GV1 == GV2)
     return Offset1 == (Offset2 + Dist*Bytes);
   return false;
 }
 
 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
 // not enforce equality of the chain operands.
 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
                             unsigned Bytes, int Dist,
                             SelectionDAG &DAG) {
   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
     EVT VT = LS->getMemoryVT();
     SDValue Loc = LS->getBasePtr();
     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
   }
 
   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
     EVT VT;
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
     default: return false;
     case Intrinsic::ppc_qpx_qvlfd:
     case Intrinsic::ppc_qpx_qvlfda:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvlfs:
     case Intrinsic::ppc_qpx_qvlfsa:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvlfcd:
     case Intrinsic::ppc_qpx_qvlfcda:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvlfcs:
     case Intrinsic::ppc_qpx_qvlfcsa:
       VT = MVT::v2f32;
       break;
     case Intrinsic::ppc_qpx_qvlfiwa:
     case Intrinsic::ppc_qpx_qvlfiwz:
     case Intrinsic::ppc_altivec_lvx:
     case Intrinsic::ppc_altivec_lvxl:
     case Intrinsic::ppc_vsx_lxvw4x:
       VT = MVT::v4i32;
       break;
     case Intrinsic::ppc_vsx_lxvd2x:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_altivec_lvebx:
       VT = MVT::i8;
       break;
     case Intrinsic::ppc_altivec_lvehx:
       VT = MVT::i16;
       break;
     case Intrinsic::ppc_altivec_lvewx:
       VT = MVT::i32;
       break;
     }
 
     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
   }
 
   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
     EVT VT;
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
     default: return false;
     case Intrinsic::ppc_qpx_qvstfd:
     case Intrinsic::ppc_qpx_qvstfda:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvstfs:
     case Intrinsic::ppc_qpx_qvstfsa:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvstfcd:
     case Intrinsic::ppc_qpx_qvstfcda:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvstfcs:
     case Intrinsic::ppc_qpx_qvstfcsa:
       VT = MVT::v2f32;
       break;
     case Intrinsic::ppc_qpx_qvstfiw:
     case Intrinsic::ppc_qpx_qvstfiwa:
     case Intrinsic::ppc_altivec_stvx:
     case Intrinsic::ppc_altivec_stvxl:
     case Intrinsic::ppc_vsx_stxvw4x:
       VT = MVT::v4i32;
       break;
     case Intrinsic::ppc_vsx_stxvd2x:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_altivec_stvebx:
       VT = MVT::i8;
       break;
     case Intrinsic::ppc_altivec_stvehx:
       VT = MVT::i16;
       break;
     case Intrinsic::ppc_altivec_stvewx:
       VT = MVT::i32;
       break;
     }
 
     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
   }
 
   return false;
 }
 
 // Return true is there is a nearyby consecutive load to the one provided
 // (regardless of alignment). We search up and down the chain, looking though
 // token factors and other loads (but nothing else). As a result, a true result
 // indicates that it is safe to create a new consecutive load adjacent to the
 // load provided.
 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
   SDValue Chain = LD->getChain();
   EVT VT = LD->getMemoryVT();
 
   SmallSet<SDNode *, 16> LoadRoots;
   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
   SmallSet<SDNode *, 16> Visited;
 
   // First, search up the chain, branching to follow all token-factor operands.
   // If we find a consecutive load, then we're done, otherwise, record all
   // nodes just above the top-level loads and token factors.
   while (!Queue.empty()) {
     SDNode *ChainNext = Queue.pop_back_val();
     if (!Visited.insert(ChainNext).second)
       continue;
 
     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
         return true;
 
       if (!Visited.count(ChainLD->getChain().getNode()))
         Queue.push_back(ChainLD->getChain().getNode());
     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
       for (const SDUse &O : ChainNext->ops())
         if (!Visited.count(O.getNode()))
           Queue.push_back(O.getNode());
     } else
       LoadRoots.insert(ChainNext);
   }
 
   // Second, search down the chain, starting from the top-level nodes recorded
   // in the first phase. These top-level nodes are the nodes just above all
   // loads and token factors. Starting with their uses, recursively look though
   // all loads (just the chain uses) and token factors to find a consecutive
   // load.
   Visited.clear();
   Queue.clear();
 
   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
        IE = LoadRoots.end(); I != IE; ++I) {
     Queue.push_back(*I);
 
     while (!Queue.empty()) {
       SDNode *LoadRoot = Queue.pop_back_val();
       if (!Visited.insert(LoadRoot).second)
         continue;
 
       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
           return true;
 
       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
            UE = LoadRoot->use_end(); UI != UE; ++UI)
         if (((isa<MemSDNode>(*UI) &&
             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
           Queue.push_back(*UI);
     }
   }
 
   return false;
 }
 
 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
 
   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
   // If we're tracking CR bits, we need to be careful that we don't have:
   //   trunc(binary-ops(zext(x), zext(y)))
   // or
   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
   // such that we're unnecessarily moving things into GPRs when it would be
   // better to keep them in CR bits.
 
   // Note that trunc here can be an actual i1 trunc, or can be the effective
   // truncation that comes from a setcc or select_cc.
   if (N->getOpcode() == ISD::TRUNCATE &&
       N->getValueType(0) != MVT::i1)
     return SDValue();
 
   if (N->getOperand(0).getValueType() != MVT::i32 &&
       N->getOperand(0).getValueType() != MVT::i64)
     return SDValue();
 
   if (N->getOpcode() == ISD::SETCC ||
       N->getOpcode() == ISD::SELECT_CC) {
     // If we're looking at a comparison, then we need to make sure that the
     // high bits (all except for the first) don't matter the result.
     ISD::CondCode CC =
       cast<CondCodeSDNode>(N->getOperand(
         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
 
     if (ISD::isSignedIntSetCC(CC)) {
       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
         return SDValue();
     } else if (ISD::isUnsignedIntSetCC(CC)) {
       if (!DAG.MaskedValueIsZero(N->getOperand(0),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
           !DAG.MaskedValueIsZero(N->getOperand(1),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
         return SDValue();
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
       APInt Op1Zero, Op1One;
       APInt Op2Zero, Op2One;
       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
 
       // We don't really care about what is known about the first bit (if
       // anything), so clear it in all masks prior to comparing them.
       Op1Zero.clearBit(0); Op1One.clearBit(0);
       Op2Zero.clearBit(0); Op2One.clearBit(0);
 
       if (Op1Zero != Op2Zero || Op1One != Op2One)
         return SDValue();
     }
   }
 
   // We now know that the higher-order bits are irrelevant, we just need to
   // make sure that all of the intermediate operations are bit operations, and
   // all inputs are extensions.
   if (N->getOperand(0).getOpcode() != ISD::AND &&
       N->getOperand(0).getOpcode() != ISD::OR  &&
       N->getOperand(0).getOpcode() != ISD::XOR &&
       N->getOperand(0).getOpcode() != ISD::SELECT &&
       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
     return SDValue();
 
   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
       N->getOperand(1).getOpcode() != ISD::AND &&
       N->getOperand(1).getOpcode() != ISD::OR  &&
       N->getOperand(1).getOpcode() != ISD::XOR &&
       N->getOperand(1).getOpcode() != ISD::SELECT &&
       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
     return SDValue();
 
   SmallVector<SDValue, 4> Inputs;
   SmallVector<SDValue, 8> BinOps, PromOps;
   SmallPtrSet<SDNode *, 16> Visited;
 
   for (unsigned i = 0; i < 2; ++i) {
     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
         isa<ConstantSDNode>(N->getOperand(i)))
       Inputs.push_back(N->getOperand(i));
     else
       BinOps.push_back(N->getOperand(i));
 
     if (N->getOpcode() == ISD::TRUNCATE)
       break;
   }
 
   // Visit all inputs, collect all binary operations (and, or, xor and
   // select) that are all fed by extensions.
   while (!BinOps.empty()) {
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
 
     if (!Visited.insert(BinOp.getNode()).second)
       continue;
 
     PromOps.push_back(BinOp);
 
     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
       // The condition of the select is not promoted.
       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
         continue;
       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
         continue;
 
       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
           isa<ConstantSDNode>(BinOp.getOperand(i))) {
         Inputs.push_back(BinOp.getOperand(i));
       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
         BinOps.push_back(BinOp.getOperand(i));
       } else {
         // We have an input that is not an extension or another binary
         // operation; we'll abort this transformation.
         return SDValue();
       }
     }
   }
 
   // Make sure that this is a self-contained cluster of operations (which
   // is not quite the same thing as saying that everything has only one
   // use).
   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
     if (isa<ConstantSDNode>(Inputs[i]))
       continue;
 
     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
                               UE = Inputs[i].getNode()->use_end();
          UI != UE; ++UI) {
       SDNode *User = *UI;
       if (User != N && !Visited.count(User))
         return SDValue();
 
       // Make sure that we're not going to promote the non-output-value
       // operand(s) or SELECT or SELECT_CC.
       // FIXME: Although we could sometimes handle this, and it does occur in
       // practice that one of the condition inputs to the select is also one of
       // the outputs, we currently can't deal with this.
       if (User->getOpcode() == ISD::SELECT) {
         if (User->getOperand(0) == Inputs[i])
           return SDValue();
       } else if (User->getOpcode() == ISD::SELECT_CC) {
         if (User->getOperand(0) == Inputs[i] ||
             User->getOperand(1) == Inputs[i])
           return SDValue();
       }
     }
   }
 
   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
                               UE = PromOps[i].getNode()->use_end();
          UI != UE; ++UI) {
       SDNode *User = *UI;
       if (User != N && !Visited.count(User))
         return SDValue();
 
       // Make sure that we're not going to promote the non-output-value
       // operand(s) or SELECT or SELECT_CC.
       // FIXME: Although we could sometimes handle this, and it does occur in
       // practice that one of the condition inputs to the select is also one of
       // the outputs, we currently can't deal with this.
       if (User->getOpcode() == ISD::SELECT) {
         if (User->getOperand(0) == PromOps[i])
           return SDValue();
       } else if (User->getOpcode() == ISD::SELECT_CC) {
         if (User->getOperand(0) == PromOps[i] ||
             User->getOperand(1) == PromOps[i])
           return SDValue();
       }
     }
   }
 
   // Replace all inputs with the extension operand.
   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
     // Constants may have users outside the cluster of to-be-promoted nodes,
     // and so we need to replace those as we do the promotions.
     if (isa<ConstantSDNode>(Inputs[i]))
       continue;
     else
       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
   }
 
   std::list<HandleSDNode> PromOpHandles;
   for (auto &PromOp : PromOps)
     PromOpHandles.emplace_back(PromOp);
 
   // Replace all operations (these are all the same, but have a different
   // (i1) return type). DAG.getNode will validate that the types of
   // a binary operator match, so go through the list in reverse so that
   // we've likely promoted both operands first. Any intermediate truncations or
   // extensions disappear.
   while (!PromOpHandles.empty()) {
     SDValue PromOp = PromOpHandles.back().getValue();
     PromOpHandles.pop_back();
 
     if (PromOp.getOpcode() == ISD::TRUNCATE ||
         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
         PromOp.getOpcode() == ISD::ANY_EXTEND) {
       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
           PromOp.getOperand(0).getValueType() != MVT::i1) {
         // The operand is not yet ready (see comment below).
         PromOpHandles.emplace_front(PromOp);
         continue;
       }
 
       SDValue RepValue = PromOp.getOperand(0);
       if (isa<ConstantSDNode>(RepValue))
         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
 
       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
       continue;
     }
 
     unsigned C;
     switch (PromOp.getOpcode()) {
     default:             C = 0; break;
     case ISD::SELECT:    C = 1; break;
     case ISD::SELECT_CC: C = 2; break;
     }
 
     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
          PromOp.getOperand(C).getValueType() != MVT::i1) ||
         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
       // The to-be-promoted operands of this node have not yet been
       // promoted (this should be rare because we're going through the
       // list backward, but if one of the operands has several users in
       // this cluster of to-be-promoted nodes, it is possible).
       PromOpHandles.emplace_front(PromOp);
       continue;
     }
 
     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
                                 PromOp.getNode()->op_end());
 
     // If there are any constant inputs, make sure they're replaced now.
     for (unsigned i = 0; i < 2; ++i)
       if (isa<ConstantSDNode>(Ops[C+i]))
         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
 
     DAG.ReplaceAllUsesOfValueWith(PromOp,
       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
   }
 
   // Now we're left with the initial truncation itself.
   if (N->getOpcode() == ISD::TRUNCATE)
     return N->getOperand(0);
 
   // Otherwise, this is a comparison. The operands to be compared have just
   // changed type (to i1), but everything else is the same.
   return SDValue(N, 0);
 }
 
 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
 
   // If we're tracking CR bits, we need to be careful that we don't have:
   //   zext(binary-ops(trunc(x), trunc(y)))
   // or
   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
   // such that we're unnecessarily moving things into CR bits that can more
   // efficiently stay in GPRs. Note that if we're not certain that the high
   // bits are set as required by the final extension, we still may need to do
   // some masking to get the proper behavior.
 
   // This same functionality is important on PPC64 when dealing with
   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
   // the return values of functions. Because it is so similar, it is handled
   // here as well.
 
   if (N->getValueType(0) != MVT::i32 &&
       N->getValueType(0) != MVT::i64)
     return SDValue();
 
   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
     return SDValue();
 
   if (N->getOperand(0).getOpcode() != ISD::AND &&
       N->getOperand(0).getOpcode() != ISD::OR  &&
       N->getOperand(0).getOpcode() != ISD::XOR &&
       N->getOperand(0).getOpcode() != ISD::SELECT &&
       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
     return SDValue();
 
   SmallVector<SDValue, 4> Inputs;
   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
   SmallPtrSet<SDNode *, 16> Visited;
 
   // Visit all inputs, collect all binary operations (and, or, xor and
   // select) that are all fed by truncations.
   while (!BinOps.empty()) {
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
 
     if (!Visited.insert(BinOp.getNode()).second)
       continue;
 
     PromOps.push_back(BinOp);
 
     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
       // The condition of the select is not promoted.
       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
         continue;
       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
         continue;
 
       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
           isa<ConstantSDNode>(BinOp.getOperand(i))) {
         Inputs.push_back(BinOp.getOperand(i));
       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
         BinOps.push_back(BinOp.getOperand(i));
       } else {
         // We have an input that is not a truncation or another binary
         // operation; we'll abort this transformation.
         return SDValue();
       }
     }
   }
 
   // The operands of a select that must be truncated when the select is
   // promoted because the operand is actually part of the to-be-promoted set.
   DenseMap<SDNode *, EVT> SelectTruncOp[2];
 
   // Make sure that this is a self-contained cluster of operations (which
   // is not quite the same thing as saying that everything has only one
   // use).
   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
     if (isa<ConstantSDNode>(Inputs[i]))
       continue;
 
     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
                               UE = Inputs[i].getNode()->use_end();
          UI != UE; ++UI) {
       SDNode *User = *UI;
       if (User != N && !Visited.count(User))
         return SDValue();
 
       // If we're going to promote the non-output-value operand(s) or SELECT or
       // SELECT_CC, record them for truncation.
       if (User->getOpcode() == ISD::SELECT) {
         if (User->getOperand(0) == Inputs[i])
           SelectTruncOp[0].insert(std::make_pair(User,
                                     User->getOperand(0).getValueType()));
       } else if (User->getOpcode() == ISD::SELECT_CC) {
         if (User->getOperand(0) == Inputs[i])
           SelectTruncOp[0].insert(std::make_pair(User,
                                     User->getOperand(0).getValueType()));
         if (User->getOperand(1) == Inputs[i])
           SelectTruncOp[1].insert(std::make_pair(User,
                                     User->getOperand(1).getValueType()));
       }
     }
   }
 
   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
                               UE = PromOps[i].getNode()->use_end();
          UI != UE; ++UI) {
       SDNode *User = *UI;
       if (User != N && !Visited.count(User))
         return SDValue();
 
       // If we're going to promote the non-output-value operand(s) or SELECT or
       // SELECT_CC, record them for truncation.
       if (User->getOpcode() == ISD::SELECT) {
         if (User->getOperand(0) == PromOps[i])
           SelectTruncOp[0].insert(std::make_pair(User,
                                     User->getOperand(0).getValueType()));
       } else if (User->getOpcode() == ISD::SELECT_CC) {
         if (User->getOperand(0) == PromOps[i])
           SelectTruncOp[0].insert(std::make_pair(User,
                                     User->getOperand(0).getValueType()));
         if (User->getOperand(1) == PromOps[i])
           SelectTruncOp[1].insert(std::make_pair(User,
                                     User->getOperand(1).getValueType()));
       }
     }
   }
 
   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
   bool ReallyNeedsExt = false;
   if (N->getOpcode() != ISD::ANY_EXTEND) {
     // If all of the inputs are not already sign/zero extended, then
     // we'll still need to do that at the end.
     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
       if (isa<ConstantSDNode>(Inputs[i]))
         continue;
 
       unsigned OpBits =
         Inputs[i].getOperand(0).getValueSizeInBits();
       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
 
       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
                                   APInt::getHighBitsSet(OpBits,
                                                         OpBits-PromBits))) ||
           (N->getOpcode() == ISD::SIGN_EXTEND &&
            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
              (OpBits-(PromBits-1)))) {
         ReallyNeedsExt = true;
         break;
       }
     }
   }
 
   // Replace all inputs, either with the truncation operand, or a
   // truncation or extension to the final output type.
   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
     // Constant inputs need to be replaced with the to-be-promoted nodes that
     // use them because they might have users outside of the cluster of
     // promoted nodes.
     if (isa<ConstantSDNode>(Inputs[i]))
       continue;
 
     SDValue InSrc = Inputs[i].getOperand(0);
     if (Inputs[i].getValueType() == N->getValueType(0))
       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
     else if (N->getOpcode() == ISD::SIGN_EXTEND)
       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
     else if (N->getOpcode() == ISD::ZERO_EXTEND)
       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
     else
       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
   }
 
   std::list<HandleSDNode> PromOpHandles;
   for (auto &PromOp : PromOps)
     PromOpHandles.emplace_back(PromOp);
 
   // Replace all operations (these are all the same, but have a different
   // (promoted) return type). DAG.getNode will validate that the types of
   // a binary operator match, so go through the list in reverse so that
   // we've likely promoted both operands first.
   while (!PromOpHandles.empty()) {
     SDValue PromOp = PromOpHandles.back().getValue();
     PromOpHandles.pop_back();
 
     unsigned C;
     switch (PromOp.getOpcode()) {
     default:             C = 0; break;
     case ISD::SELECT:    C = 1; break;
     case ISD::SELECT_CC: C = 2; break;
     }
 
     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
       // The to-be-promoted operands of this node have not yet been
       // promoted (this should be rare because we're going through the
       // list backward, but if one of the operands has several users in
       // this cluster of to-be-promoted nodes, it is possible).
       PromOpHandles.emplace_front(PromOp);
       continue;
     }
 
     // For SELECT and SELECT_CC nodes, we do a similar check for any
     // to-be-promoted comparison inputs.
     if (PromOp.getOpcode() == ISD::SELECT ||
         PromOp.getOpcode() == ISD::SELECT_CC) {
       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
           (SelectTruncOp[1].count(PromOp.getNode()) &&
            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
         PromOpHandles.emplace_front(PromOp);
         continue;
       }
     }
 
     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
                                 PromOp.getNode()->op_end());
 
     // If this node has constant inputs, then they'll need to be promoted here.
     for (unsigned i = 0; i < 2; ++i) {
       if (!isa<ConstantSDNode>(Ops[C+i]))
         continue;
       if (Ops[C+i].getValueType() == N->getValueType(0))
         continue;
 
       if (N->getOpcode() == ISD::SIGN_EXTEND)
         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
       else if (N->getOpcode() == ISD::ZERO_EXTEND)
         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
       else
         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
     }
 
     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
     // truncate them again to the original value type.
     if (PromOp.getOpcode() == ISD::SELECT ||
         PromOp.getOpcode() == ISD::SELECT_CC) {
       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
       if (SI0 != SelectTruncOp[0].end())
         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
       if (SI1 != SelectTruncOp[1].end())
         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
     }
 
     DAG.ReplaceAllUsesOfValueWith(PromOp,
       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
   }
 
   // Now we're left with the initial extension itself.
   if (!ReallyNeedsExt)
     return N->getOperand(0);
 
   // To zero extend, just mask off everything except for the first bit (in the
   // i1 case).
   if (N->getOpcode() == ISD::ZERO_EXTEND)
     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
                        DAG.getConstant(APInt::getLowBitsSet(
                                          N->getValueSizeInBits(0), PromBits),
                                        dl, N->getValueType(0)));
 
   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
          "Invalid extension type");
   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
   SDValue ShiftCst =
       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
   return DAG.getNode(
       ISD::SRA, dl, N->getValueType(0),
       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
       ShiftCst);
 }
 
 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
          "Should be called with a BUILD_VECTOR node");
 
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
     return SDValue();
 
   // Looking for:
   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
   if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
       N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
     return SDValue();
   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
     return SDValue();
   if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
     return SDValue();
 
   SDValue Ext1 = N->getOperand(0).getOperand(0);
   SDValue Ext2 = N->getOperand(1).getOperand(0);
   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
     return SDValue();
 
   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
   if (!Ext1Op || !Ext2Op)
     return SDValue();
   if (Ext1.getValueType() != MVT::i32 ||
       Ext2.getValueType() != MVT::i32)
   if (Ext1.getOperand(0) != Ext2.getOperand(0))
     return SDValue();
 
   int FirstElem = Ext1Op->getZExtValue();
   int SecondElem = Ext2Op->getZExtValue();
   int SubvecIdx;
   if (FirstElem == 0 && SecondElem == 1)
     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
   else if (FirstElem == 2 && SecondElem == 3)
     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
   else
     return SDValue();
 
   SDValue SrcVec = Ext1.getOperand(0);
   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
   return DAG.getNode(NodeType, dl, MVT::v2f64,
                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
 }
 
 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
   assert((N->getOpcode() == ISD::SINT_TO_FP ||
           N->getOpcode() == ISD::UINT_TO_FP) &&
          "Need an int -> FP conversion node here");
 
-  if (!Subtarget.has64BitSupport())
+  if (useSoftFloat() || !Subtarget.has64BitSupport())
     return SDValue();
 
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   SDValue Op(N, 0);
 
   // Don't handle ppc_fp128 here or i1 conversions.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
   if (Op.getOperand(0).getValueType() == MVT::i1)
     return SDValue();
 
   // For i32 intermediate values, unfortunately, the conversion functions
   // leave the upper 32 bits of the value are undefined. Within the set of
   // scalar instructions, we have no method for zero- or sign-extending the
   // value. Thus, we cannot handle i32 intermediate values here.
   if (Op.getOperand(0).getValueType() == MVT::i32)
     return SDValue();
 
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
          "UINT_TO_FP is supported only with FPCVT");
 
   // If we have FCFIDS, then use it when converting to single-precision.
   // Otherwise, convert to double-precision and then round.
   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
                                                             : PPCISD::FCFIDS)
                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
                                                             : PPCISD::FCFID);
   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
                   ? MVT::f32
                   : MVT::f64;
 
   // If we're converting from a float, to an int, and back to a float again,
   // then we don't need the store/load pair at all.
   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
        Subtarget.hasFPCVT()) ||
       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
     SDValue Src = Op.getOperand(0).getOperand(0);
     if (Src.getValueType() == MVT::f32) {
       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
       DCI.AddToWorklist(Src.getNode());
     } else if (Src.getValueType() != MVT::f64) {
       // Make sure that we don't pick up a ppc_fp128 source value.
       return SDValue();
     }
 
     unsigned FCTOp =
       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
                                                         PPCISD::FCTIDUZ;
 
     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
 
     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
       FP = DAG.getNode(ISD::FP_ROUND, dl,
                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
       DCI.AddToWorklist(FP.getNode());
     }
 
     return FP;
   }
 
   return SDValue();
 }
 
 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
 // builtins) into loads with swaps.
 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   SDValue Chain;
   SDValue Base;
   MachineMemOperand *MMO;
 
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Unexpected opcode for little endian VSX load");
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(N);
     Chain = LD->getChain();
     Base = LD->getBasePtr();
     MMO = LD->getMemOperand();
     // If the MMO suggests this isn't a load of a full vector, leave
     // things alone.  For a built-in, we have to make the change for
     // correctness, so if there is a size problem that will be a bug.
     if (MMO->getSize() < 16)
       return SDValue();
     break;
   }
   case ISD::INTRINSIC_W_CHAIN: {
     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
     Chain = Intrin->getChain();
     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
     // us what we want. Get operand 2 instead.
     Base = Intrin->getOperand(2);
     MMO = Intrin->getMemOperand();
     break;
   }
   }
 
   MVT VecTy = N->getValueType(0).getSimpleVT();
   SDValue LoadOps[] = { Chain, Base };
   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
                                          DAG.getVTList(MVT::v2f64, MVT::Other),
                                          LoadOps, MVT::v2f64, MMO);
 
   DCI.AddToWorklist(Load.getNode());
   Chain = Load.getValue(1);
   SDValue Swap = DAG.getNode(
       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
   DCI.AddToWorklist(Swap.getNode());
 
   // Add a bitcast if the resulting load type doesn't match v2f64.
   if (VecTy != MVT::v2f64) {
     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
     DCI.AddToWorklist(N.getNode());
     // Package {bitcast value, swap's chain} to match Load's shape.
     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
                        N, Swap.getValue(1));
   }
 
   return Swap;
 }
 
 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
 // builtins) into stores with swaps.
 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   SDValue Chain;
   SDValue Base;
   unsigned SrcOpnd;
   MachineMemOperand *MMO;
 
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Unexpected opcode for little endian VSX store");
   case ISD::STORE: {
     StoreSDNode *ST = cast<StoreSDNode>(N);
     Chain = ST->getChain();
     Base = ST->getBasePtr();
     MMO = ST->getMemOperand();
     SrcOpnd = 1;
     // If the MMO suggests this isn't a store of a full vector, leave
     // things alone.  For a built-in, we have to make the change for
     // correctness, so if there is a size problem that will be a bug.
     if (MMO->getSize() < 16)
       return SDValue();
     break;
   }
   case ISD::INTRINSIC_VOID: {
     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
     Chain = Intrin->getChain();
     // Intrin->getBasePtr() oddly does not get what we want.
     Base = Intrin->getOperand(3);
     MMO = Intrin->getMemOperand();
     SrcOpnd = 2;
     break;
   }
   }
 
   SDValue Src = N->getOperand(SrcOpnd);
   MVT VecTy = Src.getValueType().getSimpleVT();
 
   // All stores are done as v2f64 and possible bit cast.
   if (VecTy != MVT::v2f64) {
     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
     DCI.AddToWorklist(Src.getNode());
   }
 
   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
   DCI.AddToWorklist(Swap.getNode());
   Chain = Swap.getValue(1);
   SDValue StoreOps[] = { Chain, Swap, Base };
   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
                                           DAG.getVTList(MVT::Other),
                                           StoreOps, VecTy, MMO);
   DCI.AddToWorklist(Store.getNode());
   return Store;
 }
 
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   switch (N->getOpcode()) {
   default: break;
   case PPCISD::SHL:
     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
         return N->getOperand(0);
     break;
   case PPCISD::SRL:
     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
         return N->getOperand(0);
     break;
   case PPCISD::SRA:
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
       if (C->isNullValue() ||   //  0 >>s V -> 0.
           C->isAllOnesValue())    // -1 >>s V -> -1.
         return N->getOperand(0);
     }
     break;
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:
     return DAGCombineExtBoolTrunc(N, DCI);
   case ISD::TRUNCATE:
   case ISD::SETCC:
   case ISD::SELECT_CC:
     return DAGCombineTruncBoolExt(N, DCI);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     return combineFPToIntToFP(N, DCI);
   case ISD::STORE: {
     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
         N->getOperand(1).getValueType() == MVT::i32 &&
         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
       SDValue Val = N->getOperand(1).getOperand(0);
       if (Val.getValueType() == MVT::f32) {
         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
         DCI.AddToWorklist(Val.getNode());
       }
       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
       DCI.AddToWorklist(Val.getNode());
 
       SDValue Ops[] = {
         N->getOperand(0), Val, N->getOperand(2),
         DAG.getValueType(N->getOperand(1).getValueType())
       };
 
       Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
               DAG.getVTList(MVT::Other), Ops,
               cast<StoreSDNode>(N)->getMemoryVT(),
               cast<StoreSDNode>(N)->getMemOperand());
       DCI.AddToWorklist(Val.getNode());
       return Val;
     }
 
     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
     if (cast<StoreSDNode>(N)->isUnindexed() &&
         N->getOperand(1).getOpcode() == ISD::BSWAP &&
         N->getOperand(1).getNode()->hasOneUse() &&
         (N->getOperand(1).getValueType() == MVT::i32 ||
          N->getOperand(1).getValueType() == MVT::i16 ||
          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
           N->getOperand(1).getValueType() == MVT::i64))) {
       SDValue BSwapOp = N->getOperand(1).getOperand(0);
       // Do an any-extend to 32-bits if this is a half-word input.
       if (BSwapOp.getValueType() == MVT::i16)
         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
 
       SDValue Ops[] = {
         N->getOperand(0), BSwapOp, N->getOperand(2),
         DAG.getValueType(N->getOperand(1).getValueType())
       };
       return
         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
                                 cast<StoreSDNode>(N)->getMemOperand());
     }
 
     // For little endian, VSX stores require generating xxswapd/lxvd2x.
     EVT VT = N->getOperand(1).getValueType();
     if (VT.isSimple()) {
       MVT StoreVT = VT.getSimpleVT();
       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
         return expandVSXStoreForLE(N, DCI);
     }
     break;
   }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT VT = LD->getValueType(0);
 
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
     if (VT.isSimple()) {
       MVT LoadVT = VT.getSimpleVT();
       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
     }
 
     // We sometimes end up with a 64-bit integer load, from which we extract
     // two single-precision floating-point numbers. This happens with
     // std::complex<float>, and other similar structures, because of the way we
     // canonicalize structure copies. However, if we lack direct moves,
     // then the final bitcasts from the extracted integer values to the
     // floating-point numbers turn into store/load pairs. Even with direct moves,
     // just loading the two floating-point numbers is likely better.
     auto ReplaceTwoFloatLoad = [&]() {
       if (VT != MVT::i64)
         return false;
 
       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
           LD->isVolatile())
         return false;
 
       //  We're looking for a sequence like this:
       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
       //      t16: i64 = srl t13, Constant:i32<32>
       //    t17: i32 = truncate t16
       //  t18: f32 = bitcast t17
       //    t19: i32 = truncate t13
       //  t20: f32 = bitcast t19
 
       if (!LD->hasNUsesOfValue(2, 0))
         return false;
 
       auto UI = LD->use_begin();
       while (UI.getUse().getResNo() != 0) ++UI;
       SDNode *Trunc = *UI++;
       while (UI.getUse().getResNo() != 0) ++UI;
       SDNode *RightShift = *UI;
       if (Trunc->getOpcode() != ISD::TRUNCATE)
         std::swap(Trunc, RightShift);
 
       if (Trunc->getOpcode() != ISD::TRUNCATE ||
           Trunc->getValueType(0) != MVT::i32 ||
           !Trunc->hasOneUse())
         return false;
       if (RightShift->getOpcode() != ISD::SRL ||
           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
           RightShift->getConstantOperandVal(1) != 32 ||
           !RightShift->hasOneUse())
         return false;
 
       SDNode *Trunc2 = *RightShift->use_begin();
       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
           Trunc2->getValueType(0) != MVT::i32 ||
           !Trunc2->hasOneUse())
         return false;
 
       SDNode *Bitcast = *Trunc->use_begin();
       SDNode *Bitcast2 = *Trunc2->use_begin();
 
       if (Bitcast->getOpcode() != ISD::BITCAST ||
           Bitcast->getValueType(0) != MVT::f32)
         return false;
       if (Bitcast2->getOpcode() != ISD::BITCAST ||
           Bitcast2->getValueType(0) != MVT::f32)
         return false;
 
       if (Subtarget.isLittleEndian())
         std::swap(Bitcast, Bitcast2);
 
       // Bitcast has the second float (in memory-layout order) and Bitcast2
       // has the first one.
 
       SDValue BasePtr = LD->getBasePtr();
       if (LD->isIndexed()) {
         assert(LD->getAddressingMode() == ISD::PRE_INC &&
                "Non-pre-inc AM on PPC?");
         BasePtr =
           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                       LD->getOffset());
       }
 
       auto MMOFlags =
           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
                                       LD->getPointerInfo(), LD->getAlignment(),
                                       MMOFlags, LD->getAAInfo());
       SDValue AddPtr =
         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                     BasePtr, DAG.getIntPtrConstant(4, dl));
       SDValue FloatLoad2 = DAG.getLoad(
           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
           LD->getPointerInfo().getWithOffset(4),
           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
 
       if (LD->isIndexed()) {
         // Note that DAGCombine should re-form any pre-increment load(s) from
         // what is produced here if that makes sense.
         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
       }
 
       DCI.CombineTo(Bitcast2, FloatLoad);
       DCI.CombineTo(Bitcast, FloatLoad2);
 
       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
                                     SDValue(FloatLoad2.getNode(), 1));
       return true;
     };
 
     if (ReplaceTwoFloatLoad())
       return SDValue(N, 0);
 
     EVT MemVT = LD->getMemoryVT();
     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
     if (LD->isUnindexed() && VT.isVector() &&
         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
           // P8 and later hardware should just use LOAD.
           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
           LD->getAlignment() >= ScalarABIAlignment)) &&
         LD->getAlignment() < ABIAlignment) {
       // This is a type-legal unaligned Altivec or QPX load.
       SDValue Chain = LD->getChain();
       SDValue Ptr = LD->getBasePtr();
       bool isLittleEndian = Subtarget.isLittleEndian();
 
       // This implements the loading of unaligned vectors as described in
       // the venerable Apple Velocity Engine overview. Specifically:
       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
       //
       // The general idea is to expand a sequence of one or more unaligned
       // loads into an alignment-based permutation-control instruction (lvsl
       // or lvsr), a series of regular vector loads (which always truncate
       // their input address to an aligned address), and a series of
       // permutations.  The results of these permutations are the requested
       // loaded values.  The trick is that the last "extra" load is not taken
       // from the address you might suspect (sizeof(vector) bytes after the
       // last requested load), but rather sizeof(vector) - 1 bytes after the
       // last requested vector. The point of this is to avoid a page fault if
       // the base address happened to be aligned. This works because if the
       // base address is aligned, then adding less than a full vector length
       // will cause the last vector in the sequence to be (re)loaded.
       // Otherwise, the next vector will be fetched as you might suspect was
       // necessary.
 
       // We might be able to reuse the permutation generation from
       // a different base address offset from this one by an aligned amount.
       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
       // optimization later.
       Intrinsic::ID Intr, IntrLD, IntrPerm;
       MVT PermCntlTy, PermTy, LDTy;
       if (Subtarget.hasAltivec()) {
         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
                                  Intrinsic::ppc_altivec_lvsl;
         IntrLD = Intrinsic::ppc_altivec_lvx;
         IntrPerm = Intrinsic::ppc_altivec_vperm;
         PermCntlTy = MVT::v16i8;
         PermTy = MVT::v4i32;
         LDTy = MVT::v4i32;
       } else {
         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
                                        Intrinsic::ppc_qpx_qvlpcls;
         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
                                        Intrinsic::ppc_qpx_qvlfs;
         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
         PermCntlTy = MVT::v4f64;
         PermTy = MVT::v4f64;
         LDTy = MemVT.getSimpleVT();
       }
 
       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
 
       // Create the new MMO for the new base load. It is like the original MMO,
       // but represents an area in memory almost twice the vector size centered
       // on the original address. If the address is unaligned, we might start
       // reading up to (sizeof(vector)-1) bytes below the address of the
       // original unaligned load.
       MachineFunction &MF = DAG.getMachineFunction();
       MachineMemOperand *BaseMMO =
         MF.getMachineMemOperand(LD->getMemOperand(),
                                 -(long)MemVT.getStoreSize()+1,
                                 2*MemVT.getStoreSize()-1);
 
       // Create the new base load.
       SDValue LDXIntID =
           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue BaseLoad =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
                                 DAG.getVTList(PermTy, MVT::Other),
                                 BaseLoadOps, LDTy, BaseMMO);
 
       // Note that the value of IncOffset (which is provided to the next
       // load's pointer info offset value, and thus used to calculate the
       // alignment), and the value of IncValue (which is actually used to
       // increment the pointer value) are different! This is because we
       // require the next load to appear to be aligned, even though it
       // is actually offset from the base pointer by a lesser amount.
       int IncOffset = VT.getSizeInBits() / 8;
       int IncValue = IncOffset;
 
       // Walk (both up and down) the chain looking for another load at the real
       // (aligned) offset (the alignment of the other load does not matter in
       // this case). If found, then do not use the offset reduction trick, as
       // that will prevent the loads from being later combined (as they would
       // otherwise be duplicates).
       if (!findConsecutiveLoad(LD, DAG))
         --IncValue;
 
       SDValue Increment =
           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
 
       MachineMemOperand *ExtraMMO =
         MF.getMachineMemOperand(LD->getMemOperand(),
                                 1, 2*MemVT.getStoreSize()-1);
       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue ExtraLoad =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
                                 DAG.getVTList(PermTy, MVT::Other),
                                 ExtraLoadOps, LDTy, ExtraMMO);
 
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
         BaseLoad.getValue(1), ExtraLoad.getValue(1));
 
       // Because vperm has a big-endian bias, we must reverse the order
       // of the input vectors and complement the permute control vector
       // when generating little endian code.  We have already handled the
       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
       // and ExtraLoad here.
       SDValue Perm;
       if (isLittleEndian)
         Perm = BuildIntrinsicOp(IntrPerm,
                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
       else
         Perm = BuildIntrinsicOp(IntrPerm,
                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
 
       if (VT != PermTy)
         Perm = Subtarget.hasAltivec() ?
                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
                                DAG.getTargetConstant(1, dl, MVT::i64));
                                // second argument is 1 because this rounding
                                // is always exact.
 
       // The output of the permutation is our loaded result, the TokenFactor is
       // our new chain.
       DCI.CombineTo(N, Perm, TF);
       return SDValue(N, 0);
     }
     }
     break;
     case ISD::INTRINSIC_WO_CHAIN: {
       bool isLittleEndian = Subtarget.isLittleEndian();
       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
                                            : Intrinsic::ppc_altivec_lvsl);
       if ((IID == Intr ||
            IID == Intrinsic::ppc_qpx_qvlpcld  ||
            IID == Intrinsic::ppc_qpx_qvlpcls) &&
         N->getOperand(1)->getOpcode() == ISD::ADD) {
         SDValue Add = N->getOperand(1);
 
         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
 
         if (DAG.MaskedValueIsZero(
                 Add->getOperand(1),
                 APInt::getAllOnesValue(Bits /* alignment */)
                     .zext(
                         Add.getValueType().getScalarType().getSizeInBits()))) {
           SDNode *BasePtr = Add->getOperand(0).getNode();
           for (SDNode::use_iterator UI = BasePtr->use_begin(),
                                     UE = BasePtr->use_end();
                UI != UE; ++UI) {
             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
               // We've found another LVSL/LVSR, and this address is an aligned
               // multiple of that one. The results will be the same, so use the
               // one we've just found instead.
 
               return SDValue(*UI, 0);
             }
           }
         }
 
         if (isa<ConstantSDNode>(Add->getOperand(1))) {
           SDNode *BasePtr = Add->getOperand(0).getNode();
           for (SDNode::use_iterator UI = BasePtr->use_begin(),
                UE = BasePtr->use_end(); UI != UE; ++UI) {
             if (UI->getOpcode() == ISD::ADD &&
                 isa<ConstantSDNode>(UI->getOperand(1)) &&
                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
                 (1ULL << Bits) == 0) {
               SDNode *OtherAdd = *UI;
               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
                   return SDValue(*VI, 0);
                 }
               }
             }
           }
         }
       }
     }
 
     break;
   case ISD::INTRINSIC_W_CHAIN: {
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
       case Intrinsic::ppc_vsx_lxvw4x:
       case Intrinsic::ppc_vsx_lxvd2x:
         return expandVSXLoadForLE(N, DCI);
       }
     }
     break;
   }
   case ISD::INTRINSIC_VOID: {
     // For little endian, VSX stores require generating xxswapd/stxvd2x.
     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
       case Intrinsic::ppc_vsx_stxvw4x:
       case Intrinsic::ppc_vsx_stxvd2x:
         return expandVSXStoreForLE(N, DCI);
       }
     }
     break;
   }
   case ISD::BSWAP:
     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
         N->getOperand(0).hasOneUse() &&
         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
           N->getValueType(0) == MVT::i64))) {
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
       // Create the byte-swapping load.
       SDValue Ops[] = {
         LD->getChain(),    // Chain
         LD->getBasePtr(),  // Ptr
         DAG.getValueType(N->getValueType(0)) // VT
       };
       SDValue BSLoad =
         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
                                               MVT::i64 : MVT::i32, MVT::Other),
                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
 
       // If this is an i16 load, insert the truncate.
       SDValue ResVal = BSLoad;
       if (N->getValueType(0) == MVT::i16)
         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
 
       // First, combine the bswap away.  This makes the value produced by the
       // load dead.
       DCI.CombineTo(N, ResVal);
 
       // Next, combine the load away, we give it a bogus result value but a real
       // chain result.  The result value is dead because the bswap is dead.
       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
 
       // Return N so it doesn't get rechecked!
       return SDValue(N, 0);
     }
 
     break;
   case PPCISD::VCMP: {
     // If a VCMPo node already exists with exactly the same operands as this
     // node, use its result instead of this node (VCMPo computes both a CR6 and
     // a normal output).
     //
     if (!N->getOperand(0).hasOneUse() &&
         !N->getOperand(1).hasOneUse() &&
         !N->getOperand(2).hasOneUse()) {
 
       // Scan all of the users of the LHS, looking for VCMPo's that match.
       SDNode *VCMPoNode = nullptr;
 
       SDNode *LHSN = N->getOperand(0).getNode();
       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
            UI != E; ++UI)
         if (UI->getOpcode() == PPCISD::VCMPo &&
             UI->getOperand(1) == N->getOperand(1) &&
             UI->getOperand(2) == N->getOperand(2) &&
             UI->getOperand(0) == N->getOperand(0)) {
           VCMPoNode = *UI;
           break;
         }
 
       // If there is no VCMPo node, or if the flag value has a single use, don't
       // transform this.
       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
         break;
 
       // Look at the (necessarily single) use of the flag value.  If it has a
       // chain, this transformation is more complex.  Note that multiple things
       // could use the value result, which we should ignore.
       SDNode *FlagUser = nullptr;
       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
            FlagUser == nullptr; ++UI) {
         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
         SDNode *User = *UI;
         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
             FlagUser = User;
             break;
           }
         }
       }
 
       // If the user is a MFOCRF instruction, we know this is safe.
       // Otherwise we give up for right now.
       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
         return SDValue(VCMPoNode, 0);
     }
     break;
   }
   case ISD::BRCOND: {
     SDValue Cond = N->getOperand(1);
     SDValue Target = N->getOperand(2);
 
     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
           Intrinsic::ppc_is_decremented_ctr_nonzero) {
 
       // We now need to make the intrinsic dead (it cannot be instruction
       // selected).
       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
       assert(Cond.getNode()->hasOneUse() &&
              "Counter decrement has more than one use");
 
       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
                          N->getOperand(0), Target);
     }
   }
   break;
   case ISD::BR_CC: {
     // If this is a branch on an altivec predicate comparison, lower this so
     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
     // lowering is done pre-legalize, because the legalizer lowers the predicate
     // compare down to code that is difficult to reassemble.
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
 
     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
     // value. If so, pass-through the AND to get to the intrinsic.
     if (LHS.getOpcode() == ISD::AND &&
         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
           Intrinsic::ppc_is_decremented_ctr_nonzero &&
         isa<ConstantSDNode>(LHS.getOperand(1)) &&
         !isNullConstant(LHS.getOperand(1)))
       LHS = LHS.getOperand(0);
 
     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
           Intrinsic::ppc_is_decremented_ctr_nonzero &&
         isa<ConstantSDNode>(RHS)) {
       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
              "Counter decrement comparison is not EQ or NE");
 
       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
                     (CC == ISD::SETNE && !Val);
 
       // We now need to make the intrinsic dead (it cannot be instruction
       // selected).
       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
       assert(LHS.getNode()->hasOneUse() &&
              "Counter decrement has more than one use");
 
       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
                          N->getOperand(0), N->getOperand(4));
     }
 
     int CompareOpc;
     bool isDot;
 
     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
       assert(isDot && "Can't compare against a vector result!");
 
       // If this is a comparison against something other than 0/1, then we know
       // that the condition is never/always true.
       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
       if (Val != 0 && Val != 1) {
         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
           return N->getOperand(0);
         // Always !=, turn it into an unconditional branch.
         return DAG.getNode(ISD::BR, dl, MVT::Other,
                            N->getOperand(0), N->getOperand(4));
       }
 
       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
 
       // Create the PPCISD altivec 'dot' comparison node.
       SDValue Ops[] = {
         LHS.getOperand(2),  // LHS of compare
         LHS.getOperand(3),  // RHS of compare
         DAG.getConstant(CompareOpc, dl, MVT::i32)
       };
       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
 
       // Unpack the result based on how the target uses it.
       PPC::Predicate CompOpc;
       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
       default:  // Can't happen, don't crash on invalid number though.
       case 0:   // Branch on the value of the EQ bit of CR6.
         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
         break;
       case 1:   // Branch on the inverted value of the EQ bit of CR6.
         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
         break;
       case 2:   // Branch on the value of the LT bit of CR6.
         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
         break;
       case 3:   // Branch on the inverted value of the LT bit of CR6.
         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
         break;
       }
 
       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
                          DAG.getConstant(CompOpc, dl, MVT::i32),
                          DAG.getRegister(PPC::CR6, MVT::i32),
                          N->getOperand(4), CompNode.getValue(1));
     }
     break;
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
   }
 
   return SDValue();
 }
 
 SDValue
 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                   SelectionDAG &DAG,
                                   std::vector<SDNode *> *Created) const {
   // fold (sdiv X, pow2)
   EVT VT = N->getValueType(0);
   if (VT == MVT::i64 && !Subtarget.isPPC64())
     return SDValue();
   if ((VT != MVT::i32 && VT != MVT::i64) ||
       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
     return SDValue();
 
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
 
   bool IsNegPow2 = (-Divisor).isPowerOf2();
   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
 
   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
   if (Created)
     Created->push_back(Op.getNode());
 
   if (IsNegPow2) {
     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
     if (Created)
       Created->push_back(Op.getNode());
   }
 
   return Op;
 }
 
 //===----------------------------------------------------------------------===//
 // Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
                                                       APInt &KnownZero,
                                                       APInt &KnownOne,
                                                       const SelectionDAG &DAG,
                                                       unsigned Depth) const {
   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
   switch (Op.getOpcode()) {
   default: break;
   case PPCISD::LBRX: {
     // lhbrx is known to have the top bits cleared out.
     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
       KnownZero = 0xFFFF0000;
     break;
   }
   case ISD::INTRINSIC_WO_CHAIN: {
     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
     default: break;
     case Intrinsic::ppc_altivec_vcmpbfp_p:
     case Intrinsic::ppc_altivec_vcmpeqfp_p:
     case Intrinsic::ppc_altivec_vcmpequb_p:
     case Intrinsic::ppc_altivec_vcmpequh_p:
     case Intrinsic::ppc_altivec_vcmpequw_p:
     case Intrinsic::ppc_altivec_vcmpequd_p:
     case Intrinsic::ppc_altivec_vcmpgefp_p:
     case Intrinsic::ppc_altivec_vcmpgtfp_p:
     case Intrinsic::ppc_altivec_vcmpgtsb_p:
     case Intrinsic::ppc_altivec_vcmpgtsh_p:
     case Intrinsic::ppc_altivec_vcmpgtsw_p:
     case Intrinsic::ppc_altivec_vcmpgtsd_p:
     case Intrinsic::ppc_altivec_vcmpgtub_p:
     case Intrinsic::ppc_altivec_vcmpgtuh_p:
     case Intrinsic::ppc_altivec_vcmpgtuw_p:
     case Intrinsic::ppc_altivec_vcmpgtud_p:
       KnownZero = ~1U;  // All bits but the low one are known to be zero.
       break;
     }
   }
   }
 }
 
 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   switch (Subtarget.getDarwinDirective()) {
   default: break;
   case PPC::DIR_970:
   case PPC::DIR_PWR4:
   case PPC::DIR_PWR5:
   case PPC::DIR_PWR5X:
   case PPC::DIR_PWR6:
   case PPC::DIR_PWR6X:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
   case PPC::DIR_PWR9: {
     if (!ML)
       break;
 
     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
 
     // For small loops (between 5 and 8 instructions), align to a 32-byte
     // boundary so that the entire loop fits in one instruction-cache line.
     uint64_t LoopSize = 0;
     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
         LoopSize += TII->GetInstSizeInBytes(*J);
         if (LoopSize > 32)
           break;
       }
 
     if (LoopSize > 16 && LoopSize <= 32)
       return 5;
 
     break;
   }
   }
 
   return TargetLowering::getPrefLoopAlignment(ML);
 }
 
 /// getConstraintType - Given a constraint, return the type of
 /// constraint it is for this target.
 PPCTargetLowering::ConstraintType
 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default: break;
     case 'b':
     case 'r':
     case 'f':
     case 'd':
     case 'v':
     case 'y':
       return C_RegisterClass;
     case 'Z':
       // FIXME: While Z does indicate a memory constraint, it specifically
       // indicates an r+r address (used in conjunction with the 'y' modifier
       // in the replacement string). Currently, we're forcing the base
       // register to be r0 in the asm printer (which is interpreted as zero)
       // and forming the complete address in the second register. This is
       // suboptimal.
       return C_Memory;
     }
   } else if (Constraint == "wc") { // individual CR bits.
     return C_RegisterClass;
   } else if (Constraint == "wa" || Constraint == "wd" ||
              Constraint == "wf" || Constraint == "ws") {
     return C_RegisterClass; // VSX registers.
   }
   return TargetLowering::getConstraintType(Constraint);
 }
 
 /// Examine constraint type and operand type and determine a weight value.
 /// This object must already have been set up with the operand type
 /// and the current alternative constraint selected.
 TargetLowering::ConstraintWeight
 PPCTargetLowering::getSingleConstraintMatchWeight(
     AsmOperandInfo &info, const char *constraint) const {
   ConstraintWeight weight = CW_Invalid;
   Value *CallOperandVal = info.CallOperandVal;
     // If we don't have a value, we can't do a match,
     // but allow it at the lowest weight.
   if (!CallOperandVal)
     return CW_Default;
   Type *type = CallOperandVal->getType();
 
   // Look at the constraint type.
   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
     return CW_Register; // an individual CR bit.
   else if ((StringRef(constraint) == "wa" ||
             StringRef(constraint) == "wd" ||
             StringRef(constraint) == "wf") &&
            type->isVectorTy())
     return CW_Register;
   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
     return CW_Register;
 
   switch (*constraint) {
   default:
     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
     break;
   case 'b':
     if (type->isIntegerTy())
       weight = CW_Register;
     break;
   case 'f':
     if (type->isFloatTy())
       weight = CW_Register;
     break;
   case 'd':
     if (type->isDoubleTy())
       weight = CW_Register;
     break;
   case 'v':
     if (type->isVectorTy())
       weight = CW_Register;
     break;
   case 'y':
     weight = CW_Register;
     break;
   case 'Z':
     weight = CW_Memory;
     break;
   }
   return weight;
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
                                                 MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
     switch (Constraint[0]) {
     case 'b':   // R1-R31
       if (VT == MVT::i64 && Subtarget.isPPC64())
         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
     case 'r':   // R0-R31
       if (VT == MVT::i64 && Subtarget.isPPC64())
         return std::make_pair(0U, &PPC::G8RCRegClass);
       return std::make_pair(0U, &PPC::GPRCRegClass);
     // 'd' and 'f' constraints are both defined to be "the floating point
     // registers", where one is for 32-bit and the other for 64-bit. We don't
     // really care overly much here so just give them all the same reg classes.
     case 'd':
     case 'f':
       if (VT == MVT::f32 || VT == MVT::i32)
         return std::make_pair(0U, &PPC::F4RCRegClass);
       if (VT == MVT::f64 || VT == MVT::i64)
         return std::make_pair(0U, &PPC::F8RCRegClass);
       if (VT == MVT::v4f64 && Subtarget.hasQPX())
         return std::make_pair(0U, &PPC::QFRCRegClass);
       if (VT == MVT::v4f32 && Subtarget.hasQPX())
         return std::make_pair(0U, &PPC::QSRCRegClass);
       break;
     case 'v':
       if (VT == MVT::v4f64 && Subtarget.hasQPX())
         return std::make_pair(0U, &PPC::QFRCRegClass);
       if (VT == MVT::v4f32 && Subtarget.hasQPX())
         return std::make_pair(0U, &PPC::QSRCRegClass);
       if (Subtarget.hasAltivec())
         return std::make_pair(0U, &PPC::VRRCRegClass);
     case 'y':   // crrc
       return std::make_pair(0U, &PPC::CRRCRegClass);
     }
   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
     // An individual CR bit.
     return std::make_pair(0U, &PPC::CRBITRCRegClass);
   } else if ((Constraint == "wa" || Constraint == "wd" ||
              Constraint == "wf") && Subtarget.hasVSX()) {
     return std::make_pair(0U, &PPC::VSRCRegClass);
   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
     if (VT == MVT::f32 && Subtarget.hasP8Vector())
       return std::make_pair(0U, &PPC::VSSRCRegClass);
     else
       return std::make_pair(0U, &PPC::VSFRCRegClass);
   }
 
   std::pair<unsigned, const TargetRegisterClass *> R =
       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
   // register.
   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
       PPC::GPRCRegClass.contains(R.first))
     return std::make_pair(TRI->getMatchingSuperReg(R.first,
                             PPC::sub_32, &PPC::G8RCRegClass),
                           &PPC::G8RCRegClass);
 
   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
     R.first = PPC::CR0;
     R.second = &PPC::CRRCRegClass;
   }
 
   return R;
 }
 
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                      std::string &Constraint,
                                                      std::vector<SDValue>&Ops,
                                                      SelectionDAG &DAG) const {
   SDValue Result;
 
   // Only support length 1 constraints.
   if (Constraint.length() > 1) return;
 
   char Letter = Constraint[0];
   switch (Letter) {
   default: break;
   case 'I':
   case 'J':
   case 'K':
   case 'L':
   case 'M':
   case 'N':
   case 'O':
   case 'P': {
     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
     if (!CST) return; // Must be an immediate to match.
     SDLoc dl(Op);
     int64_t Value = CST->getSExtValue();
     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
                          // numbers are printed as such.
     switch (Letter) {
     default: llvm_unreachable("Unknown constraint letter!");
     case 'I':  // "I" is a signed 16-bit constant.
       if (isInt<16>(Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
       if (isShiftedUInt<16, 16>(Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
       if (isShiftedInt<16, 16>(Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
       if (isUInt<16>(Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'M':  // "M" is a constant that is greater than 31.
       if (Value > 31)
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'N':  // "N" is a positive constant that is an exact power of two.
       if (Value > 0 && isPowerOf2_64(Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'O':  // "O" is the constant zero.
       if (Value == 0)
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
       if (isInt<16>(-Value))
         Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     }
     break;
   }
   }
 
   if (Result.getNode()) {
     Ops.push_back(Result);
     return;
   }
 
   // Handle standard constraint letters.
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
 // isLegalAddressingMode - Return true if the addressing mode represented
 // by AM is legal for this target, for a load/store of the specified type.
 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                               const AddrMode &AM, Type *Ty,
                                               unsigned AS) const {
   // PPC does not allow r+i addressing modes for vectors!
   if (Ty->isVectorTy() && AM.BaseOffs != 0)
     return false;
 
   // PPC allows a sign-extended 16-bit immediate field.
   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
     return false;
 
   // No global is ever allowed as a base.
   if (AM.BaseGV)
     return false;
 
   // PPC only support r+r,
   switch (AM.Scale) {
   case 0:  // "r+i" or just "i", depending on HasBaseReg.
     break;
   case 1:
     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
       return false;
     // Otherwise we have r+r or r+i.
     break;
   case 2:
     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
       return false;
     // Allow 2*r as r+r.
     break;
   default:
     // No other scales are supported.
     return false;
   }
 
   return true;
 }
 
 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
 
   SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   // Make sure the function does not optimize away the store of the RA to
   // the stack.
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setLRStoreRequired();
   bool isPPC64 = Subtarget.isPPC64();
   auto PtrVT = getPointerTy(MF.getDataLayout());
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
                         isPPC64 ? MVT::i64 : MVT::i32);
     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
                        MachinePointerInfo());
   }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
                      MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
 
   // Naked functions never have a frame pointer, and so we use r1. For all
   // other functions, this decision must be delayed until during PEI.
   unsigned FrameReg;
   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
   else
     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
 
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
                                          PtrVT);
   while (Depth--)
     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
                             FrameAddr, MachinePointerInfo());
   return FrameAddr;
 }
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
                                               SelectionDAG &DAG) const {
   bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI = Subtarget.isDarwinABI();
 
   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
       (!isPPC64 && VT != MVT::i32))
     report_fatal_error("Invalid register global variable type");
 
   bool is64Bit = isPPC64 && VT == MVT::i64;
   unsigned Reg = StringSwitch<unsigned>(RegName)
                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
                                   (is64Bit ? PPC::X13 : PPC::R13))
                    .Default(0);
 
   if (Reg)
     return Reg;
   report_fatal_error("Invalid register name global variable");
 }
 
 bool
 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The PowerPC target isn't yet aware of offsets.
   return false;
 }
 
 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                            const CallInst &I,
                                            unsigned Intrinsic) const {
 
   switch (Intrinsic) {
   case Intrinsic::ppc_qpx_qvlfd:
   case Intrinsic::ppc_qpx_qvlfs:
   case Intrinsic::ppc_qpx_qvlfcd:
   case Intrinsic::ppc_qpx_qvlfcs:
   case Intrinsic::ppc_qpx_qvlfiwa:
   case Intrinsic::ppc_qpx_qvlfiwz:
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
   case Intrinsic::ppc_altivec_lvebx:
   case Intrinsic::ppc_altivec_lvehx:
   case Intrinsic::ppc_altivec_lvewx:
   case Intrinsic::ppc_vsx_lxvd2x:
   case Intrinsic::ppc_vsx_lxvw4x: {
     EVT VT;
     switch (Intrinsic) {
     case Intrinsic::ppc_altivec_lvebx:
       VT = MVT::i8;
       break;
     case Intrinsic::ppc_altivec_lvehx:
       VT = MVT::i16;
       break;
     case Intrinsic::ppc_altivec_lvewx:
       VT = MVT::i32;
       break;
     case Intrinsic::ppc_vsx_lxvd2x:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvlfd:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvlfs:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvlfcd:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvlfcs:
       VT = MVT::v2f32;
       break;
     default:
       VT = MVT::v4i32;
       break;
     }
 
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = VT;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = -VT.getStoreSize()+1;
     Info.size = 2*VT.getStoreSize()-1;
     Info.align = 1;
     Info.vol = false;
     Info.readMem = true;
     Info.writeMem = false;
     return true;
   }
   case Intrinsic::ppc_qpx_qvlfda:
   case Intrinsic::ppc_qpx_qvlfsa:
   case Intrinsic::ppc_qpx_qvlfcda:
   case Intrinsic::ppc_qpx_qvlfcsa:
   case Intrinsic::ppc_qpx_qvlfiwaa:
   case Intrinsic::ppc_qpx_qvlfiwza: {
     EVT VT;
     switch (Intrinsic) {
     case Intrinsic::ppc_qpx_qvlfda:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvlfsa:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvlfcda:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvlfcsa:
       VT = MVT::v2f32;
       break;
     default:
       VT = MVT::v4i32;
       break;
     }
 
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = VT;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.size = VT.getStoreSize();
     Info.align = 1;
     Info.vol = false;
     Info.readMem = true;
     Info.writeMem = false;
     return true;
   }
   case Intrinsic::ppc_qpx_qvstfd:
   case Intrinsic::ppc_qpx_qvstfs:
   case Intrinsic::ppc_qpx_qvstfcd:
   case Intrinsic::ppc_qpx_qvstfcs:
   case Intrinsic::ppc_qpx_qvstfiw:
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
   case Intrinsic::ppc_altivec_stvebx:
   case Intrinsic::ppc_altivec_stvehx:
   case Intrinsic::ppc_altivec_stvewx:
   case Intrinsic::ppc_vsx_stxvd2x:
   case Intrinsic::ppc_vsx_stxvw4x: {
     EVT VT;
     switch (Intrinsic) {
     case Intrinsic::ppc_altivec_stvebx:
       VT = MVT::i8;
       break;
     case Intrinsic::ppc_altivec_stvehx:
       VT = MVT::i16;
       break;
     case Intrinsic::ppc_altivec_stvewx:
       VT = MVT::i32;
       break;
     case Intrinsic::ppc_vsx_stxvd2x:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvstfd:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvstfs:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvstfcd:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvstfcs:
       VT = MVT::v2f32;
       break;
     default:
       VT = MVT::v4i32;
       break;
     }
 
     Info.opc = ISD::INTRINSIC_VOID;
     Info.memVT = VT;
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = -VT.getStoreSize()+1;
     Info.size = 2*VT.getStoreSize()-1;
     Info.align = 1;
     Info.vol = false;
     Info.readMem = false;
     Info.writeMem = true;
     return true;
   }
   case Intrinsic::ppc_qpx_qvstfda:
   case Intrinsic::ppc_qpx_qvstfsa:
   case Intrinsic::ppc_qpx_qvstfcda:
   case Intrinsic::ppc_qpx_qvstfcsa:
   case Intrinsic::ppc_qpx_qvstfiwa: {
     EVT VT;
     switch (Intrinsic) {
     case Intrinsic::ppc_qpx_qvstfda:
       VT = MVT::v4f64;
       break;
     case Intrinsic::ppc_qpx_qvstfsa:
       VT = MVT::v4f32;
       break;
     case Intrinsic::ppc_qpx_qvstfcda:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_qpx_qvstfcsa:
       VT = MVT::v2f32;
       break;
     default:
       VT = MVT::v4i32;
       break;
     }
 
     Info.opc = ISD::INTRINSIC_VOID;
     Info.memVT = VT;
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
     Info.size = VT.getStoreSize();
     Info.align = 1;
     Info.vol = false;
     Info.readMem = false;
     Info.writeMem = true;
     return true;
   }
   default:
     break;
   }
 
   return false;
 }
 
 /// getOptimalMemOpType - Returns the target specific optimal type for load
 /// and store operations as a result of memset, memcpy, and memmove
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
 /// probably because the source does not need to be loaded. If 'IsMemset' is
 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
                                            bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
     const Function *F = MF.getFunction();
     // When expanding a memset, require at least two QPX instructions to cover
     // the cost of loading the value to be stored from the constant pool.
     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
       return MVT::v4f64;
     }
 
     // We should use Altivec/VSX loads and stores when available. For unaligned
     // addresses, unaligned VSX loads are only fast starting with the P8.
     if (Subtarget.hasAltivec() && Size >= 16 &&
         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
       return MVT::v4i32;
   }
 
   if (Subtarget.isPPC64()) {
     return MVT::i64;
   }
 
   return MVT::i32;
 }
 
 /// \brief Returns true if it is beneficial to convert a load of a constant
 /// to just the constant itself.
 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                           Type *Ty) const {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
   return !(BitSize == 0 || BitSize > 64);
 }
 
 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
   return NumBits1 == 64 && NumBits2 == 32;
 }
 
 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
   unsigned NumBits1 = VT1.getSizeInBits();
   unsigned NumBits2 = VT2.getSizeInBits();
   return NumBits1 == 64 && NumBits2 == 32;
 }
 
 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   // Generally speaking, zexts are not free, but they are free when they can be
   // folded with other operations.
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
     EVT MemVT = LD->getMemoryVT();
     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
          LD->getExtensionType() == ISD::ZEXTLOAD))
       return true;
   }
 
   // FIXME: Add other cases...
   //  - 32-bit shifts with a zext to i64
   //  - zext after ctlz, bswap, etc.
   //  - zext after and by a constant mask
 
   return TargetLowering::isZExtFree(Val, VT2);
 }
 
 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
   assert(VT.isFloatingPoint());
   return true;
 }
 
 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   return isInt<16>(Imm) || isUInt<16>(Imm);
 }
 
 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
   return isInt<16>(Imm) || isUInt<16>(Imm);
 }
 
 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                        unsigned,
                                                        unsigned,
                                                        bool *Fast) const {
   if (DisablePPCUnaligned)
     return false;
 
   // PowerPC supports unaligned memory access for simple non-vector types.
   // Although accessing unaligned addresses is not as efficient as accessing
   // aligned addresses, it is generally more efficient than manual expansion,
   // and generally only traps for software emulation when crossing page
   // boundaries.
 
   if (!VT.isSimple())
     return false;
 
   if (VT.getSimpleVT().isVector()) {
     if (Subtarget.hasVSX()) {
       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
           VT != MVT::v4f32 && VT != MVT::v4i32)
         return false;
     } else {
       return false;
     }
   }
 
   if (VT == MVT::ppcf128)
     return false;
 
   if (Fast)
     *Fast = true;
 
   return true;
 }
 
 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   VT = VT.getScalarType();
 
   if (!VT.isSimple())
     return false;
 
   switch (VT.getSimpleVT().SimpleTy) {
   case MVT::f32:
   case MVT::f64:
     return true;
   default:
     break;
   }
 
   return false;
 }
 
 const MCPhysReg *
 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
   // LR is a callee-save register, but we must treat it as clobbered by any call
   // site. Hence we include LR in the scratch registers, which are in turn added
   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
   // to CTR, which is used by any indirect call.
   static const MCPhysReg ScratchRegs[] = {
     PPC::X12, PPC::LR8, PPC::CTR8, 0
   };
 
   return ScratchRegs;
 }
 
 unsigned PPCTargetLowering::getExceptionPointerRegister(
     const Constant *PersonalityFn) const {
   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
 }
 
 unsigned PPCTargetLowering::getExceptionSelectorRegister(
     const Constant *PersonalityFn) const {
   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
 }
 
 bool
 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
                      EVT VT , unsigned DefinedValues) const {
   if (VT == MVT::v2i64)
     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
 
   if (Subtarget.hasVSX() || Subtarget.hasQPX())
     return true;
 
   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
 }
 
 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
   if (DisableILPPref || Subtarget.enableMachineScheduler())
     return TargetLowering::getSchedulingPreference(N);
 
   return Sched::ILP;
 }
 
 // Create a fast isel object.
 FastISel *
 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
                                   const TargetLibraryInfo *LibInfo) const {
   return PPC::createFastISel(FuncInfo, LibInfo);
 }
 
 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
   if (Subtarget.isDarwinABI()) return;
   if (!Subtarget.isPPC64()) return;
 
   // Update IsSplitCSR in PPCFunctionInfo
   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
   PFI->setIsSplitCSR(true);
 }
 
 void PPCTargetLowering::insertCopiesSplitCSR(
   MachineBasicBlock *Entry,
   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
   if (!IStart)
     return;
 
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
   MachineBasicBlock::iterator MBBI = Entry->begin();
   for (const MCPhysReg *I = IStart; *I; ++I) {
     const TargetRegisterClass *RC = nullptr;
     if (PPC::G8RCRegClass.contains(*I))
       RC = &PPC::G8RCRegClass;
     else if (PPC::F8RCRegClass.contains(*I))
       RC = &PPC::F8RCRegClass;
     else if (PPC::CRRCRegClass.contains(*I))
       RC = &PPC::CRRCRegClass;
     else if (PPC::VRRCRegClass.contains(*I))
       RC = &PPC::VRRCRegClass;
     else
       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
 
     unsigned NewVR = MRI->createVirtualRegister(RC);
     // Create copy from CSR to a virtual register.
     // FIXME: this currently does not emit CFI pseudo-instructions, it works
     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
     // nounwind. If we want to generalize this later, we may need to emit
     // CFI pseudo-instructions.
     assert(Entry->getParent()->getFunction()->hasFnAttribute(
              Attribute::NoUnwind) &&
            "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
       .addReg(*I);
 
     // Insert the copy-back instructions right before the terminator
     for (auto *Exit : Exits)
       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
               TII->get(TargetOpcode::COPY), *I)
         .addReg(NewVR);
   }
 }
 
 // Override to enable LOAD_STACK_GUARD lowering on Linux.
 bool PPCTargetLowering::useLoadStackGuardNode() const {
   if (!Subtarget.isTargetLinux())
     return TargetLowering::useLoadStackGuardNode();
   return true;
 }
 
 // Override to disable global variable loading on Linux.
 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
   if (!Subtarget.isTargetLinux())
     return TargetLowering::insertSSPDeclarations(M);
 }
Index: head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp	(revision 309148)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp	(revision 309149)
@@ -1,252 +1,252 @@
 //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the PPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-subtarget"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
 #include "PPCGenSubtargetInfo.inc"
 
 static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
 cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
 
 static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
   cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
   cl::Hidden);
 
 PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
   initializeEnvironment();
   initSubtargetFeatures(CPU, FS);
   return *this;
 }
 
 PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const PPCTargetMachine &TM)
     : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
       IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
               TargetTriple.getArch() == Triple::ppc64le),
       TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
       InstrInfo(*this), TLInfo(TM, *this) {}
 
 void PPCSubtarget::initializeEnvironment() {
   StackAlignment = 16;
   DarwinDirective = PPC::DIR_NONE;
   HasMFOCRF = false;
   Has64BitSupport = false;
   Use64BitRegs = false;
   UseCRBits = false;
-  UseSoftFloat = false;
+  HasHardFloat = false;
   HasAltivec = false;
   HasSPE = false;
   HasQPX = false;
   HasVSX = false;
   HasP8Vector = false;
   HasP8Altivec = false;
   HasP8Crypto = false;
   HasP9Vector = false;
   HasP9Altivec = false;
   HasFCPSGN = false;
   HasFSQRT = false;
   HasFRE = false;
   HasFRES = false;
   HasFRSQRTE = false;
   HasFRSQRTES = false;
   HasRecipPrec = false;
   HasSTFIWX = false;
   HasLFIWAX = false;
   HasFPRND = false;
   HasFPCVT = false;
   HasISEL = false;
   HasBPERMD = false;
   HasExtDiv = false;
   HasCMPB = false;
   HasLDBRX = false;
   IsBookE = false;
   HasOnlyMSYNC = false;
   IsPPC4xx = false;
   IsPPC6xx = false;
   IsE500 = false;
   FeatureMFTB = false;
   DeprecatedDST = false;
   HasLazyResolverStubs = false;
   HasICBT = false;
   HasInvariantFunctionDescriptors = false;
   HasPartwordAtomics = false;
   HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
   HasFusion = false;
   HasFloat128 = false;
   IsISA3_0 = false;
   UseLongCalls = false;
 
   HasPOPCNTD = POPCNTD_Unavailable;
 }
 
 void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
   if (CPUName.empty() || CPU == "generic") {
     // If cross-compiling with -march=ppc64le without -mcpu
     if (TargetTriple.getArch() == Triple::ppc64le)
       CPUName = "ppc64le";
     else
       CPUName = "generic";
   }
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
 
   // If the user requested use of 64-bit regs, but the cpu selected doesn't
   // support it, ignore.
   if (IsPPC64 && has64BitSupport())
     Use64BitRegs = true;
 
   // Set up darwin-specific properties.
   if (isDarwin())
     HasLazyResolverStubs = true;
 
   // QPX requires a 32-byte aligned stack. Note that we need to do this if
   // we're compiling for a BG/Q system regardless of whether or not QPX
   // is enabled because external functions will assume this alignment.
   IsQPXStackUnaligned = QPXStackUnaligned;
   StackAlignment = getPlatformStackAlignment();
 
   // Determine endianness.
   // FIXME: Part of the TargetMachine.
   IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
 }
 
 /// Return true if accesses to the specified global have to go through a dyld
 /// lazy resolution stub.  This means that an extra load is required to get the
 /// address of the global.
 bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
   if (!HasLazyResolverStubs)
     return false;
   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     return true;
   // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
   // the section that is being relocated. This means we have to use o load even
   // for GVs that are known to be local to the dso.
   if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
     return true;
   return false;
 }
 
 // Embedded cores need aggressive scheduling (and some others also benefit).
 static bool needsAggressiveScheduling(unsigned Directive) {
   switch (Directive) {
   default: return false;
   case PPC::DIR_440:
   case PPC::DIR_A2:
   case PPC::DIR_E500mc:
   case PPC::DIR_E5500:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
   // FIXME: Same as P8 until POWER9 scheduling info is available
   case PPC::DIR_PWR9:
     return true;
   }
 }
 
 bool PPCSubtarget::enableMachineScheduler() const {
   // Enable MI scheduling for the embedded cores.
   // FIXME: Enable this for all cores (some additional modeling
   // may be necessary).
   return needsAggressiveScheduling(DarwinDirective);
 }
 
 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
 bool PPCSubtarget::enablePostRAScheduler() const { return true; }
 
 PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
   return TargetSubtargetInfo::ANTIDEP_ALL;
 }
 
 void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
   CriticalPathRCs.clear();
   CriticalPathRCs.push_back(isPPC64() ?
                             &PPC::G8RCRegClass : &PPC::GPRCRegClass);
 }
 
 void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                        unsigned NumRegionInstrs) const {
   if (needsAggressiveScheduling(DarwinDirective)) {
     Policy.OnlyTopDown = false;
     Policy.OnlyBottomUp = false;
   }
 
   // Spilling is generally expensive on all PPC cores, so always enable
   // register-pressure tracking.
   Policy.ShouldTrackPressure = true;
 }
 
 bool PPCSubtarget::useAA() const {
   // Use AA during code generation for the embedded cores.
   return needsAggressiveScheduling(DarwinDirective);
 }
 
 bool PPCSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
 }
 
 unsigned char PPCSubtarget::classifyGlobalReference(
     const GlobalValue *GV) const {
   // Note that currently we don't generate non-pic references.
   // If a caller wants that, this will have to be updated.
 
   // Large code model always uses the TOC even for local symbols.
   if (TM.getCodeModel() == CodeModel::Large)
     return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
 
   unsigned char flags = PPCII::MO_PIC_FLAG;
 
   // Only if the relocation mode is PIC do we have to worry about
   // interposition. In all other cases we can use a slightly looser standard to
   // decide how to access the symbol.
   if (TM.getRelocationModel() == Reloc::PIC_) {
     // If it's local, or it's non-default, it can't be interposed.
     if (!GV->hasLocalLinkage() &&
         GV->hasDefaultVisibility()) {
       flags |= PPCII::MO_NLP_FLAG;
     }
     return flags;
   }
 
   if (GV->isStrongDefinitionForLinker())
     return flags;
   return flags | PPCII::MO_NLP_FLAG;
 }
 
 bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
 bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
Index: head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h	(revision 309148)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h	(revision 309149)
@@ -1,319 +1,319 @@
 //===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file declares the PowerPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
 #define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
 
 #include "PPCFrameLowering.h"
 #include "PPCISelLowering.h"
 #include "PPCInstrInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "PPCGenSubtargetInfo.inc"
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
 
 namespace llvm {
 class StringRef;
 
 namespace PPC {
   // -m directive values.
   enum {
     DIR_NONE,
     DIR_32,
     DIR_440,
     DIR_601,
     DIR_602,
     DIR_603,
     DIR_7400,
     DIR_750,
     DIR_970,
     DIR_A2,
     DIR_E500mc,
     DIR_E5500,
     DIR_PWR3,
     DIR_PWR4,
     DIR_PWR5,
     DIR_PWR5X,
     DIR_PWR6,
     DIR_PWR6X,
     DIR_PWR7,
     DIR_PWR8,
     DIR_PWR9,
     DIR_64
   };
 }
 
 class GlobalValue;
 class TargetMachine;
 
 class PPCSubtarget : public PPCGenSubtargetInfo {
 public:
   enum POPCNTDKind {
     POPCNTD_Unavailable,
     POPCNTD_Slow,
     POPCNTD_Fast
   };
 
 protected:
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned StackAlignment;
 
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
   /// Which cpu directive was used.
   unsigned DarwinDirective;
 
   /// Used by the ISel to turn in optimizations for POWER4-derived architectures
   bool HasMFOCRF;
   bool Has64BitSupport;
   bool Use64BitRegs;
   bool UseCRBits;
-  bool UseSoftFloat;
+  bool HasHardFloat;
   bool IsPPC64;
   bool HasAltivec;
   bool HasSPE;
   bool HasQPX;
   bool HasVSX;
   bool HasP8Vector;
   bool HasP8Altivec;
   bool HasP8Crypto;
   bool HasP9Vector;
   bool HasP9Altivec;
   bool HasFCPSGN;
   bool HasFSQRT;
   bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
   bool HasRecipPrec;
   bool HasSTFIWX;
   bool HasLFIWAX;
   bool HasFPRND;
   bool HasFPCVT;
   bool HasISEL;
   bool HasBPERMD;
   bool HasExtDiv;
   bool HasCMPB;
   bool HasLDBRX;
   bool IsBookE;
   bool HasOnlyMSYNC;
   bool IsE500;
   bool IsPPC4xx;
   bool IsPPC6xx;
   bool FeatureMFTB;
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
   bool IsLittleEndian;
   bool HasICBT;
   bool HasInvariantFunctionDescriptors;
   bool HasPartwordAtomics;
   bool HasDirectMove;
   bool HasHTM;
   bool HasFusion;
   bool HasFloat128;
   bool IsISA3_0;
   bool UseLongCalls;
 
   POPCNTDKind HasPOPCNTD;
 
   /// When targeting QPX running a stock PPC64 Linux kernel where the stack
   /// alignment has not been changed, we need to keep the 16-byte alignment
   /// of the stack.
   bool IsQPXStackUnaligned;
 
   const PPCTargetMachine &TM;
   PPCFrameLowering FrameLowering;
   PPCInstrInfo InstrInfo;
   PPCTargetLowering TLInfo;
   SelectionDAGTargetInfo TSInfo;
 
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
   PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
                const PPCTargetMachine &TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
   unsigned getStackAlignment() const { return StackAlignment; }
 
   /// getDarwinDirective - Returns the -m directive specified for the cpu.
   ///
   unsigned getDarwinDirective() const { return DarwinDirective; }
 
   /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
   const InstrItineraryData *getInstrItineraryData() const override {
     return &InstrItins;
   }
 
   const PPCFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
   const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
   const PPCTargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
   const PPCRegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
   const PPCTargetMachine &getTargetMachine() const { return TM; }
 
   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
   /// so that we can use initializer lists for subtarget initialization.
   PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
 private:
   void initializeEnvironment();
   void initSubtargetFeatures(StringRef CPU, StringRef FS);
 
 public:
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
   ///
   bool isPPC64() const;
 
   /// has64BitSupport - Return true if the selected CPU supports 64-bit
   /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
   bool has64BitSupport() const { return Has64BitSupport; }
   // useSoftFloat - Return true if soft-float option is turned on.
-  bool useSoftFloat() const { return UseSoftFloat; }
+  bool useSoftFloat() const { return !HasHardFloat; }
 
   /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
   /// registers in 32-bit mode when possible.  This can only true if
   /// has64BitSupport() returns true.
   bool use64BitRegs() const { return Use64BitRegs; }
 
   /// useCRBits - Return true if we should store and manipulate i1 values in
   /// the individual condition register bits.
   bool useCRBits() const { return UseCRBits; }
 
   /// hasLazyResolverStub - Return true if accesses to the specified global have
   /// to go through a dyld lazy resolution stub.  This means that an extra load
   /// is required to get the address of the global.
   bool hasLazyResolverStub(const GlobalValue *GV) const;
 
   // isLittleEndian - True if generating little-endian code
   bool isLittleEndian() const { return IsLittleEndian; }
 
   // Specific obvious features.
   bool hasFCPSGN() const { return HasFCPSGN; }
   bool hasFSQRT() const { return HasFSQRT; }
   bool hasFRE() const { return HasFRE; }
   bool hasFRES() const { return HasFRES; }
   bool hasFRSQRTE() const { return HasFRSQRTE; }
   bool hasFRSQRTES() const { return HasFRSQRTES; }
   bool hasRecipPrec() const { return HasRecipPrec; }
   bool hasSTFIWX() const { return HasSTFIWX; }
   bool hasLFIWAX() const { return HasLFIWAX; }
   bool hasFPRND() const { return HasFPRND; }
   bool hasFPCVT() const { return HasFPCVT; }
   bool hasAltivec() const { return HasAltivec; }
   bool hasSPE() const { return HasSPE; }
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
   bool hasP8Crypto() const { return HasP8Crypto; }
   bool hasP9Vector() const { return HasP9Vector; }
   bool hasP9Altivec() const { return HasP9Altivec; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasBPERMD() const { return HasBPERMD; }
   bool hasExtDiv() const { return HasExtDiv; }
   bool hasCMPB() const { return HasCMPB; }
   bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
   bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
   bool isPPC4xx() const { return IsPPC4xx; }
   bool isPPC6xx() const { return IsPPC6xx; }
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
   bool hasICBT() const { return HasICBT; }
   bool hasInvariantFunctionDescriptors() const {
     return HasInvariantFunctionDescriptors;
   }
   bool hasPartwordAtomics() const { return HasPartwordAtomics; }
   bool hasDirectMove() const { return HasDirectMove; }
 
   bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
   unsigned getPlatformStackAlignment() const {
     if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
       return 32;
 
     return 16;
   }
   bool hasHTM() const { return HasHTM; }
   bool hasFusion() const { return HasFusion; }
   bool hasFloat128() const { return HasFloat128; }
   bool isISA3_0() const { return IsISA3_0; }
   bool useLongCalls() const { return UseLongCalls; }
 
   POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
   /// isDarwin - True if this is any darwin platform.
   bool isDarwin() const { return TargetTriple.isMacOSX(); }
   /// isBGQ - True if this is a BG/Q platform.
   bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 
   bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
   bool isSVR4ABI() const { return !isDarwinABI(); }
   bool isELFv2ABI() const;
 
   bool enableEarlyIfConversion() const override { return hasISEL(); }
 
   // Scheduling customization.
   bool enableMachineScheduler() const override;
   // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
   bool enablePostRAScheduler() const override;
   AntiDepBreakMode getAntiDepBreakMode() const override;
   void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
 
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
   bool useAA() const override;
 
   bool enableSubRegLiveness() const override;
 
   /// classifyGlobalReference - Classify a global variable reference for the
   /// current subtarget accourding to how we should reference it.
   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
 };
 } // End llvm namespace
 
 #endif
Index: head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
===================================================================
--- head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp	(revision 309148)
+++ head/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp	(revision 309149)
@@ -1,442 +1,442 @@
 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // Top-level implementation for the PowerPC target.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPCTargetMachine.h"
 #include "PPC.h"
 #include "PPCTargetObjectFile.h"
 #include "PPCTargetTransformInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 static cl::
 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
                         cl::desc("Disable CTR loops for PPC"));
 
 static cl::
 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
                             cl::desc("Disable PPC loop preinc prep"));
 
 static cl::opt<bool>
 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
 
 static cl::
 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                 cl::desc("Disable VSX Swap Removal for PPC"));
 
 static cl::
 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
                               cl::desc("Disable QPX load splat simplification"));
 
 static cl::
 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
                             cl::desc("Disable machine peepholes for PPC"));
 
 static cl::opt<bool>
 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
              cl::init(true));
 
 static cl::opt<bool>
 EnablePrefetch("enable-ppc-prefetching",
                   cl::desc("disable software prefetching on PPC"),
                   cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
                       cl::desc("Add extra TOC register dependencies"),
                       cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
 EnableMachineCombinerPass("ppc-machine-combiner",
                           cl::desc("Enable the machine combiner pass"),
                           cl::init(true), cl::Hidden);
 
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   initializePPCBoolRetToIntPass(PR);
 }
 
 /// Return the datalayout string of a subtarget.
 static std::string getDataLayoutString(const Triple &T) {
   bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
   std::string Ret;
 
   // Most PPC* platforms are big endian, PPC64LE is little endian.
   if (T.getArch() == Triple::ppc64le)
     Ret = "e";
   else
     Ret = "E";
 
   Ret += DataLayout::getManglingComponent(T);
 
   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
   // pointers.
   if (!is64Bit || T.getOS() == Triple::Lv2)
     Ret += "-p:32:32";
 
   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
   // documentation are wrong; these are correct (i.e. "what gcc does").
   if (is64Bit || !T.isOSDarwin())
     Ret += "-i64:64";
   else
     Ret += "-f64:32:64";
 
   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
   if (is64Bit)
     Ret += "-n32:64";
   else
     Ret += "-n32";
 
   return Ret;
 }
 
 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
                                       const Triple &TT) {
   std::string FullFS = FS;
 
   // Make sure 64-bit features are available when CPUname is generic
   if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
     if (!FullFS.empty())
       FullFS = "+64bit," + FullFS;
     else
       FullFS = "+64bit";
   }
 
   if (OL >= CodeGenOpt::Default) {
     if (!FullFS.empty())
       FullFS = "+crbits," + FullFS;
     else
       FullFS = "+crbits";
   }
 
   if (OL != CodeGenOpt::None) {
     if (!FullFS.empty())
       FullFS = "+invariant-function-descriptors," + FullFS;
     else
       FullFS = "+invariant-function-descriptors";
   }
 
   return FullFS;
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
   // If it isn't a Mach-O file then it's going to be a linux ELF
   // object file.
   if (TT.isOSDarwin())
     return make_unique<TargetLoweringObjectFileMachO>();
 
   return make_unique<PPC64LinuxTargetObjectFile>();
 }
 
 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
                                                  const TargetOptions &Options) {
   if (Options.MCOptions.getABIName().startswith("elfv1"))
     return PPCTargetMachine::PPC_ABI_ELFv1;
   else if (Options.MCOptions.getABIName().startswith("elfv2"))
     return PPCTargetMachine::PPC_ABI_ELFv2;
 
   assert(Options.MCOptions.getABIName().empty() &&
          "Unknown target-abi option!");
 
   if (!TT.isMacOSX()) {
     switch (TT.getArch()) {
     case Triple::ppc64le:
       return PPCTargetMachine::PPC_ABI_ELFv2;
     case Triple::ppc64:
       return PPCTargetMachine::PPC_ABI_ELFv1;
     default:
       // Fallthrough.
       ;
     }
   }
   return PPCTargetMachine::PPC_ABI_UNKNOWN;
 }
 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
                                            Optional<Reloc::Model> RM) {
   if (!RM.hasValue()) {
     if (TT.isOSDarwin())
       return Reloc::DynamicNoPIC;
     return Reloc::Static;
   }
   return *RM;
 }
 
 // The FeatureString here is a little subtle. We are modifying the feature
 // string with what are (currently) non-function specific overrides as it goes
 // into the LLVMTargetMachine constructor and then using the stored value in the
 // Subtarget constructor below it.
 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
                                    const TargetOptions &Options,
                                    Optional<Reloc::Model> RM,
                                    CodeModel::Model CM, CodeGenOpt::Level OL)
     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                         computeFSAdditions(FS, OL, TT), Options,
                         getEffectiveRelocModel(TT, RM), CM, OL),
       TLOF(createTLOF(getTargetTriple())),
       TargetABI(computeTargetABI(TT, Options)),
       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 
   // For the estimates, convergence is quadratic, so we essentially double the
   // number of digits correct after every iteration. For both FRE and FRSQRTE,
   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
   unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
            RefinementSteps64 = RefinementSteps + 1;
 
   this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
   this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
   this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
   this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
 
   this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
   this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
   this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
   this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
 
   initAsmInfo();
 }
 
 PPCTargetMachine::~PPCTargetMachine() {}
 
 void PPC32TargetMachine::anchor() { }
 
 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Optional<Reloc::Model> RM,
                                        CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 
 void PPC64TargetMachine::anchor() { }
 
 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Optional<Reloc::Model> RM,
                                        CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 
 const PPCSubtarget *
 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
   Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
                         : TargetCPU;
   std::string FS = !FSAttr.hasAttribute(Attribute::None)
                        ? FSAttr.getValueAsString().str()
                        : TargetFS;
 
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
   bool SoftFloat =
       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
   // If the soft float attribute is set on the function turn on the soft float
   // subtarget feature.
   if (SoftFloat)
-    FS += FS.empty() ? "+soft-float" : ",+soft-float";
+    FS += FS.empty() ? "-hard-float" : ",-hard-float";
 
   auto &I = SubtargetMap[CPU + FS];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = llvm::make_unique<PPCSubtarget>(
         TargetTriple, CPU,
         // FIXME: It would be good to have the subtarget additions here
         // not necessary. Anything that turns them on/off (overrides) ends
         // up being put at the end of the feature string, but the defaults
         // shouldn't require adding them. Fixing this means pulling Feature64Bit
         // out of most of the target cpus in the .td file and making it set only
         // as part of initialization via the TargetTriple.
         computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
   }
   return I.get();
 }
 
 //===----------------------------------------------------------------------===//
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
 namespace {
 /// PPC Code Generator Pass Configuration Options.
 class PPCPassConfig : public TargetPassConfig {
 public:
   PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
     : TargetPassConfig(TM, PM) {}
 
   PPCTargetMachine &getPPCTargetMachine() const {
     return getTM<PPCTargetMachine>();
   }
 
   void addIRPasses() override;
   bool addPreISel() override;
   bool addILPOpts() override;
   bool addInstSelector() override;
   void addMachineSSAOptimization() override;
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
 };
 } // namespace
 
 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new PPCPassConfig(this, PM);
 }
 
 void PPCPassConfig::addIRPasses() {
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createPPCBoolRetToIntPass());
   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
 
   // For the BG/Q (or if explicitly requested), add explicit data prefetch
   // intrinsics.
   bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
                         getOptLevel() != CodeGenOpt::None;
   if (EnablePrefetch.getNumOccurrences() > 0)
     UsePrefetching = EnablePrefetch;
   if (UsePrefetching)
     addPass(createLoopDataPrefetchPass());
 
   if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
     // and lower a GEP with multiple indices to either arithmetic operations or
     // multiple GEPs with single index.
     addPass(createSeparateConstOffsetFromGEPPass(TM, true));
     // Call EarlyCSE pass to find and remove subexpressions in the lowered
     // result.
     addPass(createEarlyCSEPass());
     // Do loop invariant code motion in case part of the lowered result is
     // invariant.
     addPass(createLICMPass());
   }
 
   TargetPassConfig::addIRPasses();
 }
 
 bool PPCPassConfig::addPreISel() {
   if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
 
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createPPCCTRLoops(getPPCTargetMachine()));
 
   return false;
 }
 
 bool PPCPassConfig::addILPOpts() {
   addPass(&EarlyIfConverterID);
 
   if (EnableMachineCombinerPass)
     addPass(&MachineCombinerID);
 
   return true;
 }
 
 bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
   addPass(createPPCISelDag(getPPCTargetMachine()));
 
 #ifndef NDEBUG
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createPPCCTRLoopsVerify());
 #endif
 
   addPass(createPPCVSXCopyPass());
   return false;
 }
 
 void PPCPassConfig::addMachineSSAOptimization() {
   TargetPassConfig::addMachineSSAOptimization();
   // For little endian, remove where possible the vector swap instructions
   // introduced at code generation to normalize vector element order.
   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
       !DisableVSXSwapRemoval)
     addPass(createPPCVSXSwapRemovalPass());
   // Target-specific peephole cleanups performed after instruction
   // selection.
   if (!DisableMIPeephole) {
     addPass(createPPCMIPeepholePass());
     addPass(&DeadMachineInstructionElimID);
   }
 }
 
 void PPCPassConfig::addPreRegAlloc() {
   if (getOptLevel() != CodeGenOpt::None) {
     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
     insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
                &PPCVSXFMAMutateID);
   }
 
   // FIXME: We probably don't need to run these for -fPIE.
   if (getPPCTargetMachine().isPositionIndependent()) {
     // FIXME: LiveVariables should not be necessary here!
     // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
     // LiveVariables. This (unnecessary) dependency has been removed now,
     // however a stage-2 clang build fails without LiveVariables computed here.
     addPass(&LiveVariablesID, false);
     addPass(createPPCTLSDynamicCallPass());
   }
   if (EnableExtraTOCRegDeps)
     addPass(createPPCTOCRegDepsPass());
 }
 
 void PPCPassConfig::addPreSched2() {
   if (getOptLevel() != CodeGenOpt::None) {
     addPass(&IfConverterID);
 
     // This optimization must happen after anything that might do store-to-load
     // forwarding. Here we're after RA (and, thus, when spills are inserted)
     // but before post-RA scheduling.
     if (!DisableQPXLoadSplat)
       addPass(createPPCQPXLoadSplatPass());
   }
 }
 
 void PPCPassConfig::addPreEmitPass() {
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createPPCEarlyReturnPass(), false);
   // Must run branch selection immediately preceding the asm printer.
   addPass(createPPCBranchSelectionPass(), false);
 }
 
 TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
   return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(PPCTTIImpl(this, F));
   });
 }
Index: head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp	(revision 309148)
+++ head/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp	(revision 309149)
@@ -1,8048 +1,8060 @@
 //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // These classes wrap the information about a call or function
 // definition used to handle ABI compliancy.
 //
 //===----------------------------------------------------------------------===//
 
 #include "TargetInfo.h"
 #include "ABIInfo.h"
 #include "CGCXXABI.h"
 #include "CGValue.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>    // std::sort
 
 using namespace clang;
 using namespace CodeGen;
 
 static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
                                llvm::Value *Array,
                                llvm::Value *Value,
                                unsigned FirstIndex,
                                unsigned LastIndex) {
   // Alternatively, we could emit this as a loop in the source.
   for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
     llvm::Value *Cell =
         Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
     Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
   }
 }
 
 static bool isAggregateTypeForABI(QualType T) {
   return !CodeGenFunction::hasScalarEvaluationKind(T) ||
          T->isMemberFunctionPointerType();
 }
 
 ABIArgInfo
 ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign,
                                  llvm::Type *Padding) const {
   return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty),
                                  ByRef, Realign, Padding);
 }
 
 ABIArgInfo
 ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
   return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
                                       /*ByRef*/ false, Realign);
 }
 
 Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
                              QualType Ty) const {
   return Address::invalid();
 }
 
 ABIInfo::~ABIInfo() {}
 
 /// Does the given lowering require more than the given number of
 /// registers when expanded?
 ///
 /// This is intended to be the basis of a reasonable basic implementation
 /// of should{Pass,Return}IndirectlyForSwift.
 ///
 /// For most targets, a limit of four total registers is reasonable; this
 /// limits the amount of code required in order to move around the value
 /// in case it wasn't produced immediately prior to the call by the caller
 /// (or wasn't produced in exactly the right registers) or isn't used
 /// immediately within the callee.  But some targets may need to further
 /// limit the register count due to an inability to support that many
 /// return registers.
 static bool occupiesMoreThan(CodeGenTypes &cgt,
                              ArrayRef<llvm::Type*> scalarTypes,
                              unsigned maxAllRegisters) {
   unsigned intCount = 0, fpCount = 0;
   for (llvm::Type *type : scalarTypes) {
     if (type->isPointerTy()) {
       intCount++;
     } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
       auto ptrWidth = cgt.getTarget().getPointerWidth(0);
       intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
     } else {
       assert(type->isVectorTy() || type->isFloatingPointTy());
       fpCount++;
     }
   }
 
   return (intCount + fpCount > maxAllRegisters);
 }
 
 bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
                                              llvm::Type *eltTy,
                                              unsigned numElts) const {
   // The default implementation of this assumes that the target guarantees
   // 128-bit SIMD support but nothing more.
   return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
 }
 
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
   if (!RD)
     return CGCXXABI::RAA_Default;
   return CXXABI.getRecordArgABI(RD);
 }
 
 static CGCXXABI::RecordArgABI getRecordArgABI(QualType T,
                                               CGCXXABI &CXXABI) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
     return CGCXXABI::RAA_Default;
   return getRecordArgABI(RT, CXXABI);
 }
 
 /// Pass transparent unions as if they were the type of the first element. Sema
 /// should ensure that all elements of the union have the same "machine type".
 static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
   if (const RecordType *UT = Ty->getAsUnionType()) {
     const RecordDecl *UD = UT->getDecl();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       assert(!UD->field_empty() && "sema created an empty transparent union");
       return UD->field_begin()->getType();
     }
   }
   return Ty;
 }
 
 CGCXXABI &ABIInfo::getCXXABI() const {
   return CGT.getCXXABI();
 }
 
 ASTContext &ABIInfo::getContext() const {
   return CGT.getContext();
 }
 
 llvm::LLVMContext &ABIInfo::getVMContext() const {
   return CGT.getLLVMContext();
 }
 
 const llvm::DataLayout &ABIInfo::getDataLayout() const {
   return CGT.getDataLayout();
 }
 
 const TargetInfo &ABIInfo::getTarget() const {
   return CGT.getTarget();
 }
 
 bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
 
 bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   return false;
 }
 
 bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
                                                 uint64_t Members) const {
   return false;
 }
 
 bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
   return false;
 }
 
 LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
   switch (TheKind) {
   case Direct:
     OS << "Direct Type=";
     if (llvm::Type *Ty = getCoerceToType())
       Ty->print(OS);
     else
       OS << "null";
     break;
   case Extend:
     OS << "Extend";
     break;
   case Ignore:
     OS << "Ignore";
     break;
   case InAlloca:
     OS << "InAlloca Offset=" << getInAllocaFieldIndex();
     break;
   case Indirect:
     OS << "Indirect Align=" << getIndirectAlign().getQuantity()
        << " ByVal=" << getIndirectByVal()
        << " Realign=" << getIndirectRealign();
     break;
   case Expand:
     OS << "Expand";
     break;
   case CoerceAndExpand:
     OS << "CoerceAndExpand Type=";
     getCoerceAndExpandType()->print(OS);
     break;
   }
   OS << ")\n";
 }
 
 // Dynamically round a pointer up to a multiple of the given alignment.
 static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
                                                   llvm::Value *Ptr,
                                                   CharUnits Align) {
   llvm::Value *PtrAsInt = Ptr;
   // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
   PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
   PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
         llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1));
   PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
            llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity()));
   PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt,
                                         Ptr->getType(),
                                         Ptr->getName() + ".aligned");
   return PtrAsInt;
 }
 
 /// Emit va_arg for a platform using the common void* representation,
 /// where arguments are simply emitted in an array of slots on the stack.
 ///
 /// This version implements the core direct-value passing rules.
 ///
 /// \param SlotSize - The size and alignment of a stack slot.
 ///   Each argument will be allocated to a multiple of this number of
 ///   slots, and all the slots will be aligned to this value.
 /// \param AllowHigherAlign - The slot alignment is not a cap;
 ///   an argument type with an alignment greater than the slot size
 ///   will be emitted on a higher-alignment address, potentially
 ///   leaving one or more empty slots behind as padding.  If this
 ///   is false, the returned address might be less-aligned than
 ///   DirectAlign.
 static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
                                       Address VAListAddr,
                                       llvm::Type *DirectTy,
                                       CharUnits DirectSize,
                                       CharUnits DirectAlign,
                                       CharUnits SlotSize,
                                       bool AllowHigherAlign) {
   // Cast the element type to i8* if necessary.  Some platforms define
   // va_list as a struct containing an i8* instead of just an i8*.
   if (VAListAddr.getElementType() != CGF.Int8PtrTy)
     VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
 
   llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
 
   // If the CC aligns values higher than the slot size, do so if needed.
   Address Addr = Address::invalid();
   if (AllowHigherAlign && DirectAlign > SlotSize) {
     Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign),
                                                  DirectAlign);
   } else {
     Addr = Address(Ptr, SlotSize); 
   }
 
   // Advance the pointer past the argument, then store that back.
   CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
   llvm::Value *NextPtr =
     CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize,
                                            "argp.next");
   CGF.Builder.CreateStore(NextPtr, VAListAddr);
 
   // If the argument is smaller than a slot, and this is a big-endian
   // target, the argument will be right-adjusted in its slot.
   if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
       !DirectTy->isStructTy()) {
     Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
   }
 
   Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
   return Addr;
 }
 
 /// Emit va_arg for a platform using the common void* representation,
 /// where arguments are simply emitted in an array of slots on the stack.
 ///
 /// \param IsIndirect - Values of this type are passed indirectly.
 /// \param ValueInfo - The size and alignment of this type, generally
 ///   computed with getContext().getTypeInfoInChars(ValueTy).
 /// \param SlotSizeAndAlign - The size and alignment of a stack slot.
 ///   Each argument will be allocated to a multiple of this number of
 ///   slots, and all the slots will be aligned to this value.
 /// \param AllowHigherAlign - The slot alignment is not a cap;
 ///   an argument type with an alignment greater than the slot size
 ///   will be emitted on a higher-alignment address, potentially
 ///   leaving one or more empty slots behind as padding.
 static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType ValueTy, bool IsIndirect,
                                 std::pair<CharUnits, CharUnits> ValueInfo,
                                 CharUnits SlotSizeAndAlign,
                                 bool AllowHigherAlign) {
   // The size and alignment of the value that was passed directly.
   CharUnits DirectSize, DirectAlign;
   if (IsIndirect) {
     DirectSize = CGF.getPointerSize();
     DirectAlign = CGF.getPointerAlign();
   } else {
     DirectSize = ValueInfo.first;
     DirectAlign = ValueInfo.second;
   }
 
   // Cast the address we've calculated to the right type.
   llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy);
   if (IsIndirect)
     DirectTy = DirectTy->getPointerTo(0);
 
   Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy,
                                         DirectSize, DirectAlign,
                                         SlotSizeAndAlign,
                                         AllowHigherAlign);
 
   if (IsIndirect) {
     Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second);
   }
 
   return Addr;
   
 }
 
 static Address emitMergePHI(CodeGenFunction &CGF,
                             Address Addr1, llvm::BasicBlock *Block1,
                             Address Addr2, llvm::BasicBlock *Block2,
                             const llvm::Twine &Name = "") {
   assert(Addr1.getType() == Addr2.getType());
   llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
   PHI->addIncoming(Addr1.getPointer(), Block1);
   PHI->addIncoming(Addr2.getPointer(), Block2);
   CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
   return Address(PHI, Align);
 }
 
 TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; }
 
 // If someone can figure out a general rule for this, that would be great.
 // It's probably just doomed to be platform-dependent, though.
 unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
   // Verified for:
   //   x86-64     FreeBSD, Linux, Darwin
   //   x86-32     FreeBSD, Linux, Darwin
   //   PowerPC    Linux, Darwin
   //   ARM        Darwin (*not* EABI)
   //   AArch64    Linux
   return 32;
 }
 
 bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args,
                                      const FunctionNoProtoType *fnType) const {
   // The following conventions are known to require this to be false:
   //   x86_stdcall
   //   MIPS
   // For everything else, we just prefer false unless we opt out.
   return false;
 }
 
 void
 TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
                                              llvm::SmallString<24> &Opt) const {
   // This assumes the user is passing a library name like "rt" instead of a
   // filename like "librt.a/so", and that they don't care whether it's static or
   // dynamic.
   Opt = "-l";
   Opt += Lib;
 }
 
 unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
   return llvm::CallingConv::C;
 }
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
 
 /// isEmptyField - Return true iff a the field is "empty", that is it
 /// is an unnamed bit-field or an (array of) empty record(s).
 static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
                          bool AllowArrays) {
   if (FD->isUnnamedBitfield())
     return true;
 
   QualType FT = FD->getType();
 
   // Constant arrays of empty records count as empty, strip them off.
   // Constant arrays of zero length always count as empty.
   if (AllowArrays)
     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
       if (AT->getSize() == 0)
         return true;
       FT = AT->getElementType();
     }
 
   const RecordType *RT = FT->getAs<RecordType>();
   if (!RT)
     return false;
 
   // C++ record fields are never empty, at least in the Itanium ABI.
   //
   // FIXME: We should use a predicate for whether this behavior is true in the
   // current ABI.
   if (isa<CXXRecordDecl>(RT->getDecl()))
     return false;
 
   return isEmptyRecord(Context, FT, AllowArrays);
 }
 
 /// isEmptyRecord - Return true iff a structure contains only empty
 /// fields. Note that a structure with a flexible array member is not
 /// considered empty.
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
     return false;
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
     return false;
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
     for (const auto &I : CXXRD->bases())
       if (!isEmptyRecord(Context, I.getType(), true))
         return false;
 
   for (const auto *I : RD->fields())
     if (!isEmptyField(Context, I, AllowArrays))
       return false;
   return true;
 }
 
 /// isSingleElementStruct - Determine if a structure is a "single
 /// element struct", i.e. it has exactly one non-empty field or
 /// exactly one field which is itself a single element
 /// struct. Structures with flexible array members are never
 /// considered single element structs.
 ///
 /// \return The field declaration for the single non-empty field, if
 /// it exists.
 static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
     return nullptr;
 
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
     return nullptr;
 
   const Type *Found = nullptr;
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
     for (const auto &I : CXXRD->bases()) {
       // Ignore empty records.
       if (isEmptyRecord(Context, I.getType(), true))
         continue;
 
       // If we already found an element then this isn't a single-element struct.
       if (Found)
         return nullptr;
 
       // If this is non-empty and not a single element struct, the composite
       // cannot be a single element struct.
       Found = isSingleElementStruct(I.getType(), Context);
       if (!Found)
         return nullptr;
     }
   }
 
   // Check for single element.
   for (const auto *FD : RD->fields()) {
     QualType FT = FD->getType();
 
     // Ignore empty fields.
     if (isEmptyField(Context, FD, true))
       continue;
 
     // If we already found an element then this isn't a single-element
     // struct.
     if (Found)
       return nullptr;
 
     // Treat single element arrays as the element.
     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
       if (AT->getSize().getZExtValue() != 1)
         break;
       FT = AT->getElementType();
     }
 
     if (!isAggregateTypeForABI(FT)) {
       Found = FT.getTypePtr();
     } else {
       Found = isSingleElementStruct(FT, Context);
       if (!Found)
         return nullptr;
     }
   }
 
   // We don't consider a struct a single-element struct if it has
   // padding beyond the element type.
   if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
     return nullptr;
 
   return Found;
 }
 
 namespace {
 Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                        const ABIArgInfo &AI) {
   // This default implementation defers to the llvm backend's va_arg
   // instruction. It can handle only passing arguments directly
   // (typically only handled in the backend for primitive types), or
   // aggregates passed indirectly by pointer (NOTE: if the "byval"
   // flag has ABI impact in the callee, this implementation cannot
   // work.)
 
   // Only a few cases are covered here at the moment -- those needed
   // by the default abi.
   llvm::Value *Val;
 
   if (AI.isIndirect()) {
     assert(!AI.getPaddingType() &&
            "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
     assert(
         !AI.getIndirectRealign() &&
         "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
 
     auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
     CharUnits TyAlignForABI = TyInfo.second;
 
     llvm::Type *BaseTy =
         llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
     llvm::Value *Addr =
         CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
     return Address(Addr, TyAlignForABI);
   } else {
     assert((AI.isDirect() || AI.isExtend()) &&
            "Unexpected ArgInfo Kind in generic VAArg emitter!");
 
     assert(!AI.getInReg() &&
            "Unexpected InReg seen in arginfo in generic VAArg emitter!");
     assert(!AI.getPaddingType() &&
            "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
     assert(!AI.getDirectOffset() &&
            "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
     assert(!AI.getCoerceToType() &&
            "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");
 
     Address Temp = CGF.CreateMemTemp(Ty, "varet");
     Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty));
     CGF.Builder.CreateStore(Val, Temp);
     return Temp;
   }
 }
 
 /// DefaultABIInfo - The default implementation for ABI specific
 /// details. This implementation provides information which results in
 /// self-consistent and sensible LLVM IR generation, but does not
 /// conform to any particular ABI.
 class DefaultABIInfo : public ABIInfo {
 public:
   DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments())
       I.info = classifyArgumentType(I.type);
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
     return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
   }
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
 };
 
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   if (isAggregateTypeForABI(Ty)) {
     // Records with non-trivial destructors/copy-constructors should not be
     // passed by value.
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
     return getNaturalAlignIndirect(Ty);
   }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   return (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   if (isAggregateTypeForABI(RetTy))
     return getNaturalAlignIndirect(RetTy);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
   return (RetTy->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 //===----------------------------------------------------------------------===//
 // WebAssembly ABI Implementation
 //
 // This is a very simple ABI that relies a lot on DefaultABIInfo.
 //===----------------------------------------------------------------------===//
 
 class WebAssemblyABIInfo final : public DefaultABIInfo {
 public:
   explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT)
       : DefaultABIInfo(CGT) {}
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
   // non-virtual, but computeInfo and EmitVAArg are virtual, so we
   // overload them.
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &Arg : FI.arguments())
       Arg.info = classifyArgumentType(Arg.type);
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
 public:
   explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
       : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {}
 };
 
 /// \brief Classify argument of given type \p Ty.
 ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   if (isAggregateTypeForABI(Ty)) {
     // Records with non-trivial destructors/copy-constructors should not be
     // passed by value.
     if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     // Ignore empty structs/unions.
     if (isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
     // Lower single-element structs to just pass a regular value. TODO: We
     // could do reasonable-size multiple-element structs too, using getExpand(),
     // though watch out for things like bitfields.
     if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
       return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
   }
 
   // Otherwise just do the default thing.
   return DefaultABIInfo::classifyArgumentType(Ty);
 }
 
 ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
   if (isAggregateTypeForABI(RetTy)) {
     // Records with non-trivial destructors/copy-constructors should not be
     // returned by value.
     if (!getRecordArgABI(RetTy, getCXXABI())) {
       // Ignore empty structs/unions.
       if (isEmptyRecord(getContext(), RetTy, true))
         return ABIArgInfo::getIgnore();
       // Lower single-element structs to just return a regular value. TODO: We
       // could do reasonable-size multiple-element structs too, using
       // ABIArgInfo::getDirect().
       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
         return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
     }
   }
 
   // Otherwise just do the default thing.
   return DefaultABIInfo::classifyReturnType(RetTy);
 }
 
 Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                       QualType Ty) const {
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false,
                           getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(4),
                           /*AllowHigherAlign=*/ true);
 }
 
 //===----------------------------------------------------------------------===//
 // le32/PNaCl bitcode ABI Implementation
 //
 // This is a simplified version of the x86_32 ABI.  Arguments and return values
 // are always passed on the stack.
 //===----------------------------------------------------------------------===//
 
 class PNaClABIInfo : public ABIInfo {
  public:
   PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF,
                     Address VAListAddr, QualType Ty) const override;
 };
 
 class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
  public:
   PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {}
 };
 
 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type);
 }
 
 Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   // The PNaCL ABI is a bit odd, in that varargs don't use normal
   // function classification. Structs get passed directly for varargs
   // functions, through a rewriting transform in
   // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
   // this target to actually support a va_arg instructions with an
   // aggregate type, unlike other targets.
   return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 }
 
 /// \brief Classify argument of given type \p Ty.
 ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
   if (isAggregateTypeForABI(Ty)) {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     return getNaturalAlignIndirect(Ty);
   } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
     // Treat an enum type as its underlying type.
     Ty = EnumTy->getDecl()->getIntegerType();
   } else if (Ty->isFloatingType()) {
     // Floating-point types don't go inreg.
     return ABIArgInfo::getDirect();
   }
 
   return (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // In the PNaCl ABI we always return records/structures on the stack.
   if (isAggregateTypeForABI(RetTy))
     return getNaturalAlignIndirect(RetTy);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
   return (RetTy->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 /// IsX86_MMXType - Return true if this is an MMX type.
 bool IsX86_MMXType(llvm::Type *IRType) {
   // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
   return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
     cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
     IRType->getScalarSizeInBits() != 64;
 }
 
 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                           StringRef Constraint,
                                           llvm::Type* Ty) {
   if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) {
     if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
       // Invalid MMX constraint
       return nullptr;
     }
 
     return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
   }
 
   // No operation needed
   return Ty;
 }
 
 /// Returns true if this type can be passed in SSE registers with the
 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half)
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
     // registers specially.
     unsigned VecSize = Context.getTypeSize(VT);
     if (VecSize == 128 || VecSize == 256 || VecSize == 512)
       return true;
   }
   return false;
 }
 
 /// Returns true if this aggregate is small enough to be passed in SSE registers
 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
   return NumMembers <= 4;
 }
 
 //===----------------------------------------------------------------------===//
 // X86-32 ABI Implementation
 //===----------------------------------------------------------------------===//
 
 /// \brief Similar to llvm::CCState, but for Clang.
 struct CCState {
   CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
 
   unsigned CC;
   unsigned FreeRegs;
   unsigned FreeSSERegs;
 };
 
 /// X86_32ABIInfo - The X86-32 ABI information.
 class X86_32ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer,
     Float
   };
 
   static const unsigned MinABIStackAlignInBytes = 4;
 
   bool IsDarwinVectorABI;
   bool IsRetSmallStructInRegABI;
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
   }
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
     // FIXME: Assumes vectorcall is in use.
     return isX86VectorTypeForVectorCall(getContext(), Ty);
   }
 
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t NumMembers) const override {
     // FIXME: Assumes vectorcall is in use.
     return isX86VectorCallAggregateSmallEnough(NumMembers);
   }
 
   bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be passed in memory.
   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
 
   ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
 
   /// \brief Return the alignment to use for the given type on the stack.
   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
 
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
   /// \brief Updates the number of available free registers, returns 
   /// true if any registers were allocated.
   bool updateFreeRegs(QualType Ty, CCState &State) const;
 
   bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
                                 bool &NeedsPadding) const;
   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
 
   bool canExpandIndirectArgument(QualType Ty) const;
 
   /// \brief Rewrite the function info so that all memory arguments use
   /// inalloca.
   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
 
   void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
                            CharUnits &StackOffset, ABIArgInfo &Info,
                            QualType Type) const;
 
 public:
 
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
                 unsigned NumRegisterParameters, bool SoftFloatABI)
     : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI), 
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     // LLVM's x86-32 lowering currently only assigns up to three
     // integer registers and three fp registers.  Oddly, it'll use up to
     // four vector registers for vectors, but those can overlap with the
     // scalar registers.
     return occupiesMoreThan(CGT, scalars, /*total*/ 3);
   }  
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                           bool RetSmallStructInRegABI, bool Win32StructABI,
                           unsigned NumRegisterParameters, bool SoftFloatABI)
       : TargetCodeGenInfo(new X86_32ABIInfo(
             CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
             NumRegisterParameters, SoftFloatABI)) {}
 
   static bool isStructReturnInRegABI(
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     // Darwin uses different dwarf register numbers for EH.
     if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
     return 4;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 
   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                   StringRef Constraint,
                                   llvm::Type* Ty) const override {
     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
   }
 
   void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
                                 std::string &Constraints,
                                 std::vector<llvm::Type *> &ResultRegTypes,
                                 std::vector<llvm::Type *> &ResultTruncRegTypes,
                                 std::vector<LValue> &ResultRegDests,
                                 std::string &AsmString,
                                 unsigned NumOutputs) const override;
 
   llvm::Constant *
   getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
     unsigned Sig = (0xeb << 0) |  // jmp rel8
                    (0x06 << 8) |  //           .+0x08
                    ('F' << 16) |
                    ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
 
   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "movl\t%ebp, %ebp"
            "\t\t## marker for objc_retainAutoreleaseReturnValue";
   }
 };
 
 }
 
 /// Rewrite input constraint references after adding some output constraints.
 /// In the case where there is one output and one input and we add one output,
 /// we need to replace all operand references greater than or equal to 1:
 ///     mov $0, $1
 ///     mov eax, $1
 /// The result will be:
 ///     mov $0, $2
 ///     mov eax, $2
 static void rewriteInputConstraintReferences(unsigned FirstIn,
                                              unsigned NumNewOuts,
                                              std::string &AsmString) {
   std::string Buf;
   llvm::raw_string_ostream OS(Buf);
   size_t Pos = 0;
   while (Pos < AsmString.size()) {
     size_t DollarStart = AsmString.find('$', Pos);
     if (DollarStart == std::string::npos)
       DollarStart = AsmString.size();
     size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart);
     if (DollarEnd == std::string::npos)
       DollarEnd = AsmString.size();
     OS << StringRef(&AsmString[Pos], DollarEnd - Pos);
     Pos = DollarEnd;
     size_t NumDollars = DollarEnd - DollarStart;
     if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
       // We have an operand reference.
       size_t DigitStart = Pos;
       size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
       if (DigitEnd == std::string::npos)
         DigitEnd = AsmString.size();
       StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart);
       unsigned OperandIndex;
       if (!OperandStr.getAsInteger(10, OperandIndex)) {
         if (OperandIndex >= FirstIn)
           OperandIndex += NumNewOuts;
         OS << OperandIndex;
       } else {
         OS << OperandStr;
       }
       Pos = DigitEnd;
     }
   }
   AsmString = std::move(OS.str());
 }
 
 /// Add output constraints for EAX:EDX because they are return registers.
 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
     CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
     std::vector<llvm::Type *> &ResultRegTypes,
     std::vector<llvm::Type *> &ResultTruncRegTypes,
     std::vector<LValue> &ResultRegDests, std::string &AsmString,
     unsigned NumOutputs) const {
   uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType());
 
   // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
   // larger.
   if (!Constraints.empty())
     Constraints += ',';
   if (RetWidth <= 32) {
     Constraints += "={eax}";
     ResultRegTypes.push_back(CGF.Int32Ty);
   } else {
     // Use the 'A' constraint for EAX:EDX.
     Constraints += "=A";
     ResultRegTypes.push_back(CGF.Int64Ty);
   }
 
   // Truncate EAX or EAX:EDX to an integer of the appropriate size.
   llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth);
   ResultTruncRegTypes.push_back(CoerceTy);
 
   // Coerce the integer by bitcasting the return slot pointer.
   ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(),
                                                   CoerceTy->getPointerTo()));
   ResultRegDests.push_back(ReturnSlot);
 
   rewriteInputConstraintReferences(NumOutputs, 1, AsmString);
 }
 
 /// shouldReturnTypeInRegister - Determine if the given type should be
 /// returned in a register (for the Darwin and MCU ABI).
 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
                                                ASTContext &Context) const {
   uint64_t Size = Context.getTypeSize(Ty);
 
   // For i386, type must be register sized.
   // For the MCU ABI, it only needs to be <= 8-byte
   if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size)))
    return false;
 
   if (Ty->isVectorType()) {
     // 64- and 128- bit vectors inside structures are not returned in
     // registers.
     if (Size == 64 || Size == 128)
       return false;
 
     return true;
   }
 
   // If this is a builtin, pointer, enum, complex type, member pointer, or
   // member function pointer it is ok.
   if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() ||
       Ty->isAnyComplexType() || Ty->isEnumeralType() ||
       Ty->isBlockPointerType() || Ty->isMemberPointerType())
     return true;
 
   // Arrays are treated like records.
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
     return shouldReturnTypeInRegister(AT->getElementType(), Context);
 
   // Otherwise, it must be a record type.
   const RecordType *RT = Ty->getAs<RecordType>();
   if (!RT) return false;
 
   // FIXME: Traverse bases here too.
 
   // Structure types are passed in register if all fields would be
   // passed in a register.
   for (const auto *FD : RT->getDecl()->fields()) {
     // Empty fields are ignored.
     if (isEmptyField(Context, FD, true))
       continue;
 
     // Check fields recursively.
     if (!shouldReturnTypeInRegister(FD->getType(), Context))
       return false;
   }
   return true;
 }
 
 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
   // Treat complex types as the element type.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
     Ty = CTy->getElementType();
 
   // Check for a type which we know has a simple scalar argument-passing
   // convention without any padding.  (We're specifically looking for 32
   // and 64-bit integer and integer-equivalents, float, and double.)
   if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
       !Ty->isEnumeralType() && !Ty->isBlockPointerType())
     return false;
 
   uint64_t Size = Context.getTypeSize(Ty);
   return Size == 32 || Size == 64;
 }
 
 /// Test whether an argument type which is to be passed indirectly (on the
 /// stack) would have the equivalent layout if it was expanded into separate
 /// arguments. If so, we prefer to do the latter to avoid inhibiting
 /// optimizations.
 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
   // We can only expand structure types.
   const RecordType *RT = Ty->getAs<RecordType>();
   if (!RT)
     return false;
   const RecordDecl *RD = RT->getDecl();
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
     if (!IsWin32StructABI ) {
       // On non-Windows, we have to conservatively match our old bitcode
       // prototypes in order to be ABI-compatible at the bitcode level.
       if (!CXXRD->isCLike())
         return false;
     } else {
       // Don't do this for dynamic classes.
       if (CXXRD->isDynamicClass())
         return false;
       // Don't do this if there are any non-empty bases.
       for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
         if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
           return false;
       }
     }
   }
 
   uint64_t Size = 0;
 
   for (const auto *FD : RD->fields()) {
     // Scalar arguments on the stack get 4 byte alignment on x86. If the
     // argument is smaller than 32-bits, expanding the struct will create
     // alignment padding.
     if (!is32Or64BitBasicType(FD->getType(), getContext()))
       return false;
 
     // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
     // how to expand them yet, and the predicate for telling if a bitfield still
     // counts as "basic" is more complicated than what we were doing previously.
     if (FD->isBitField())
       return false;
 
     Size += getContext().getTypeSize(FD->getType());
   }
 
   // We can do this if there was no alignment padding.
   return Size == getContext().getTypeSize(Ty);
 }
 
 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
   if (State.FreeRegs) {
     --State.FreeRegs;
     if (!IsMCUABI)
       return getNaturalAlignIndirectInReg(RetTy);
   }
   return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
 }
 
 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
                                              CCState &State) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
   if (State.CC == llvm::CallingConv::X86_VectorCall &&
       isHomogeneousAggregate(RetTy, Base, NumElts)) {
     // The LLVM struct type for such an aggregate should lower properly.
     return ABIArgInfo::getDirect();
   }
 
   if (const VectorType *VT = RetTy->getAs<VectorType>()) {
     // On Darwin, some vectors are returned in registers.
     if (IsDarwinVectorABI) {
       uint64_t Size = getContext().getTypeSize(RetTy);
 
       // 128-bit vectors are a special case; they are returned in
       // registers and we need to make sure to pick a type the LLVM
       // backend will like.
       if (Size == 128)
         return ABIArgInfo::getDirect(llvm::VectorType::get(
                   llvm::Type::getInt64Ty(getVMContext()), 2));
 
       // Always return in register if it fits in a general purpose
       // register, or if it is 64 bits and has a single element.
       if ((Size == 8 || Size == 16 || Size == 32) ||
           (Size == 64 && VT->getNumElements() == 1))
         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                             Size));
 
       return getIndirectReturnResult(RetTy, State);
     }
 
     return ABIArgInfo::getDirect();
   }
 
   if (isAggregateTypeForABI(RetTy)) {
     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
       // Structures with flexible arrays are always indirect.
       if (RT->getDecl()->hasFlexibleArrayMember())
         return getIndirectReturnResult(RetTy, State);
     }
 
     // If specified, structs and unions are always indirect.
     if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
       return getIndirectReturnResult(RetTy, State);
 
     // Ignore empty structs/unions.
     if (isEmptyRecord(getContext(), RetTy, true))
       return ABIArgInfo::getIgnore();
 
     // Small structures which are register sized are generally returned
     // in a register.
     if (shouldReturnTypeInRegister(RetTy, getContext())) {
       uint64_t Size = getContext().getTypeSize(RetTy);
 
       // As a special-case, if the struct is a "single-element" struct, and
       // the field is of type "float" or "double", return it in a
       // floating-point register. (MSVC does not apply this special case.)
       // We apply a similar transformation for pointer types to improve the
       // quality of the generated IR.
       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
         if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
             || SeltTy->hasPointerRepresentation())
           return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 
       // FIXME: We should be able to narrow this integer in cases with dead
       // padding.
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
     }
 
     return getIndirectReturnResult(RetTy, State);
   }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
   return (RetTy->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 static bool isSSEVectorType(ASTContext &Context, QualType Ty) {
   return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128;
 }
 
 static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) {
   const RecordType *RT = Ty->getAs<RecordType>();
   if (!RT)
     return 0;
   const RecordDecl *RD = RT->getDecl();
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
     for (const auto &I : CXXRD->bases())
       if (!isRecordWithSSEVectorType(Context, I.getType()))
         return false;
 
   for (const auto *i : RD->fields()) {
     QualType FT = i->getType();
 
     if (isSSEVectorType(Context, FT))
       return true;
 
     if (isRecordWithSSEVectorType(Context, FT))
       return true;
   }
 
   return false;
 }
 
 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
                                                  unsigned Align) const {
   // Otherwise, if the alignment is less than or equal to the minimum ABI
   // alignment, just use the default; the backend will handle this.
   if (Align <= MinABIStackAlignInBytes)
     return 0; // Use default alignment.
 
   // On non-Darwin, the stack type alignment is always 4.
   if (!IsDarwinVectorABI) {
     // Set explicit alignment, since we may need to realign the top.
     return MinABIStackAlignInBytes;
   }
 
   // Otherwise, if the type contains an SSE vector type, the alignment is 16.
   if (Align >= 16 && (isSSEVectorType(getContext(), Ty) ||
                       isRecordWithSSEVectorType(getContext(), Ty)))
     return 16;
 
   return MinABIStackAlignInBytes;
 }
 
 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
                                             CCState &State) const {
   if (!ByVal) {
     if (State.FreeRegs) {
       --State.FreeRegs; // Non-byval indirects just use one pointer.
       if (!IsMCUABI)
         return getNaturalAlignIndirectInReg(Ty);
     }
     return getNaturalAlignIndirect(Ty, false);
   }
 
   // Compute the byval alignment.
   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
   unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
   if (StackAlign == 0)
     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true);
 
   // If the stack alignment is less than the type alignment, realign the
   // argument.
   bool Realign = TypeAlign > StackAlign;
   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
                                  /*ByVal=*/true, Realign);
 }
 
 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
   const Type *T = isSingleElementStruct(Ty, getContext());
   if (!T)
     T = Ty.getTypePtr();
 
   if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
     BuiltinType::Kind K = BT->getKind();
     if (K == BuiltinType::Float || K == BuiltinType::Double)
       return Float;
   }
   return Integer;
 }
 
 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
   if (!IsSoftFloatABI) {
     Class C = classify(Ty);
     if (C == Float)
       return false;
   }
 
   unsigned Size = getContext().getTypeSize(Ty);
   unsigned SizeInRegs = (Size + 31) / 32;
 
   if (SizeInRegs == 0)
     return false;
 
   if (!IsMCUABI) {
     if (SizeInRegs > State.FreeRegs) {
       State.FreeRegs = 0;
       return false;
     }
   } else {
     // The MCU psABI allows passing parameters in-reg even if there are
     // earlier parameters that are passed on the stack. Also,
     // it does not allow passing >8-byte structs in-register,
     // even if there are 3 free registers available.
     if (SizeInRegs > State.FreeRegs || SizeInRegs > 2)
       return false;
   }
 
   State.FreeRegs -= SizeInRegs;
   return true;
 }
 
 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 
                                              bool &InReg,
                                              bool &NeedsPadding) const {
   // On Windows, aggregates other than HFAs are never passed in registers, and
   // they do not consume register slots. Homogenous floating-point aggregates
   // (HFAs) have already been dealt with at this point.
   if (IsWin32StructABI && isAggregateTypeForABI(Ty))
     return false;
 
   NeedsPadding = false;
   InReg = !IsMCUABI;
 
   if (!updateFreeRegs(Ty, State))
     return false;
 
   if (IsMCUABI)
     return true;
 
   if (State.CC == llvm::CallingConv::X86_FastCall ||
       State.CC == llvm::CallingConv::X86_VectorCall) {
     if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
       NeedsPadding = true;
 
     return false;
   }
 
   return true;
 }
 
 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
   if (!updateFreeRegs(Ty, State))
     return false;
 
   if (IsMCUABI)
     return false;
 
   if (State.CC == llvm::CallingConv::X86_FastCall ||
       State.CC == llvm::CallingConv::X86_VectorCall) {
     if (getContext().getTypeSize(Ty) > 32)
       return false;
 
     return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 
         Ty->isReferenceType());
   }
 
   return true;
 }
 
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                CCState &State) const {
   // FIXME: Set alignment on indirect arguments.
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Check with the C++ ABI first.
   const RecordType *RT = Ty->getAs<RecordType>();
   if (RT) {
     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
     if (RAA == CGCXXABI::RAA_Indirect) {
       return getIndirectResult(Ty, false, State);
     } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
       // The field index doesn't matter, we'll fix it up later.
       return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
     }
   }
 
   // vectorcall adds the concept of a homogenous vector aggregate, similar
   // to other targets.
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
   if (State.CC == llvm::CallingConv::X86_VectorCall &&
       isHomogeneousAggregate(Ty, Base, NumElts)) {
     if (State.FreeSSERegs >= NumElts) {
       State.FreeSSERegs -= NumElts;
       if (Ty->isBuiltinType() || Ty->isVectorType())
         return ABIArgInfo::getDirect();
       return ABIArgInfo::getExpand();
     }
     return getIndirectResult(Ty, /*ByVal=*/false, State);
   }
 
   if (isAggregateTypeForABI(Ty)) {
     // Structures with flexible arrays are always indirect.
     // FIXME: This should not be byval!
     if (RT && RT->getDecl()->hasFlexibleArrayMember())
       return getIndirectResult(Ty, true, State);
 
     // Ignore empty structs/unions on non-Windows.
     if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
     bool NeedsPadding = false;
     bool InReg;
     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
       if (InReg)
         return ABIArgInfo::getDirectInReg(Result);
       else
         return ABIArgInfo::getDirect(Result);
     }
     llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
 
     // Expand small (<= 128-bit) record types when we know that the stack layout
     // of those arguments will match the struct. This is important because the
     // LLVM backend isn't smart enough to remove byval, which inhibits many
     // optimizations.
     // Don't do this for the MCU if there are still free integer registers
     // (see X86_64 ABI for full explanation).
     if (getContext().getTypeSize(Ty) <= 4 * 32 &&
         (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
       return ABIArgInfo::getExpandWithPadding(
           State.CC == llvm::CallingConv::X86_FastCall ||
               State.CC == llvm::CallingConv::X86_VectorCall,
           PaddingType);
 
     return getIndirectResult(Ty, true, State);
   }
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // On Darwin, some vectors are passed in memory, we handle this by passing
     // it as an i8/i16/i32/i64.
     if (IsDarwinVectorABI) {
       uint64_t Size = getContext().getTypeSize(Ty);
       if ((Size == 8 || Size == 16 || Size == 32) ||
           (Size == 64 && VT->getNumElements() == 1))
         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                             Size));
     }
 
     if (IsX86_MMXType(CGT.ConvertType(Ty)))
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
 
     return ABIArgInfo::getDirect();
   }
 
 
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   bool InReg = shouldPrimitiveUseInReg(Ty, State);
 
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
       return ABIArgInfo::getExtendInReg();
     return ABIArgInfo::getExtend();
   }
 
   if (InReg)
     return ABIArgInfo::getDirectInReg();
   return ABIArgInfo::getDirect();
 }
 
 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   CCState State(FI.getCallingConvention());
   if (IsMCUABI)
     State.FreeRegs = 3;
   else if (State.CC == llvm::CallingConv::X86_FastCall)
     State.FreeRegs = 2;
   else if (State.CC == llvm::CallingConv::X86_VectorCall) {
     State.FreeRegs = 2;
     State.FreeSSERegs = 6;
   } else if (FI.getHasRegParm())
     State.FreeRegs = FI.getRegParm();
   else
     State.FreeRegs = DefaultNumRegisterParameters;
 
   if (!getCXXABI().classifyReturnType(FI)) {
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
   } else if (FI.getReturnInfo().isIndirect()) {
     // The C++ ABI is not aware of register usage, so we have to check if the
     // return value was sret and put it in a register ourselves if appropriate.
     if (State.FreeRegs) {
       --State.FreeRegs;  // The sret parameter consumes a register.
       if (!IsMCUABI)
         FI.getReturnInfo().setInReg(true);
     }
   }
 
   // The chain argument effectively gives us another free register.
   if (FI.isChainCall())
     ++State.FreeRegs;
 
   bool UsedInAlloca = false;
   for (auto &I : FI.arguments()) {
     I.info = classifyArgumentType(I.type, State);
     UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
   }
 
   // If we needed to use inalloca for any argument, do a second pass and rewrite
   // all the memory arguments to use inalloca.
   if (UsedInAlloca)
     rewriteWithInAlloca(FI);
 }
 
 void
 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
                                    CharUnits &StackOffset, ABIArgInfo &Info,
                                    QualType Type) const {
   // Arguments are always 4-byte-aligned.
   CharUnits FieldAlign = CharUnits::fromQuantity(4);
 
   assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
   Info = ABIArgInfo::getInAlloca(FrameFields.size());
   FrameFields.push_back(CGT.ConvertTypeForMem(Type));
   StackOffset += getContext().getTypeSizeInChars(Type);
 
   // Insert padding bytes to respect alignment.
   CharUnits FieldEnd = StackOffset;
   StackOffset = FieldEnd.alignTo(FieldAlign);
   if (StackOffset != FieldEnd) {
     CharUnits NumBytes = StackOffset - FieldEnd;
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
     Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
     FrameFields.push_back(Ty);
   }
 }
 
 static bool isArgInAlloca(const ABIArgInfo &Info) {
   // Leave ignored and inreg arguments alone.
   switch (Info.getKind()) {
   case ABIArgInfo::InAlloca:
     return true;
   case ABIArgInfo::Indirect:
     assert(Info.getIndirectByVal());
     return true;
   case ABIArgInfo::Ignore:
     return false;
   case ABIArgInfo::Direct:
   case ABIArgInfo::Extend:
     if (Info.getInReg())
       return false;
     return true;
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
     // These are aggregate types which are never passed in registers when
     // inalloca is involved.
     return true;
   }
   llvm_unreachable("invalid enum");
 }
 
 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
   assert(IsWin32StructABI && "inalloca only supported on win32");
 
   // Build a packed struct type for all of the arguments in memory.
   SmallVector<llvm::Type *, 6> FrameFields;
 
   // The stack alignment is always 4.
   CharUnits StackAlign = CharUnits::fromQuantity(4);
 
   CharUnits StackOffset;
   CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
 
   // Put 'this' into the struct before 'sret', if necessary.
   bool IsThisCall =
       FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
   ABIArgInfo &Ret = FI.getReturnInfo();
   if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
       isArgInAlloca(I->info)) {
     addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
     ++I;
   }
 
   // Put the sret parameter into the inalloca struct if it's in memory.
   if (Ret.isIndirect() && !Ret.getInReg()) {
     CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
     addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
     // On Windows, the hidden sret parameter is always returned in eax.
     Ret.setInAllocaSRet(IsWin32StructABI);
   }
 
   // Skip the 'this' parameter in ecx.
   if (IsThisCall)
     ++I;
 
   // Put arguments passed in memory into the struct.
   for (; I != E; ++I) {
     if (isArgInAlloca(I->info))
       addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
   }
 
   FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
                                         /*isPacked=*/true),
                   StackAlign);
 }
 
 Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
                                  Address VAListAddr, QualType Ty) const {
 
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
 
   // x86-32 changes the alignment of certain arguments on the stack.
   //
   // Just messing with TypeInfo like this works because we never pass
   // anything indirectly.
   TypeInfo.second = CharUnits::fromQuantity(
                 getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity()));
 
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
                           TypeInfo, CharUnits::fromQuantity(4),
                           /*AllowHigherAlign*/ true);
 }
 
 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
     const llvm::Triple &Triple, const CodeGenOptions &Opts) {
   assert(Triple.getArch() == llvm::Triple::x86);
 
   switch (Opts.getStructReturnConvention()) {
   case CodeGenOptions::SRCK_Default:
     break;
   case CodeGenOptions::SRCK_OnStack:  // -fpcc-struct-return
     return false;
   case CodeGenOptions::SRCK_InRegs:  // -freg-struct-return
     return true;
   }
 
   if (Triple.isOSDarwin() || Triple.isOSIAMCU())
     return true;
 
   switch (Triple.getOS()) {
   case llvm::Triple::DragonFly:
   case llvm::Triple::FreeBSD:
   case llvm::Triple::OpenBSD:
   case llvm::Triple::Bitrig:
   case llvm::Triple::Win32:
     return true;
   default:
     return false;
   }
 }
 
 void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
       // Get the LLVM function.
       llvm::Function *Fn = cast<llvm::Function>(GV);
 
       // Now add the 'alignstack' attribute with a value of 16.
       llvm::AttrBuilder B;
       B.addStackAlignmentAttr(16);
       Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
                       llvm::AttributeSet::get(CGM.getLLVMContext(),
                                               llvm::AttributeSet::FunctionIndex,
                                               B));
     }
     if (FD->hasAttr<AnyX86InterruptAttr>()) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
       Fn->setCallingConv(llvm::CallingConv::X86_INTR);
     }
   }
 }
 
 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
                                                CodeGen::CodeGenFunction &CGF,
                                                llvm::Value *Address) const {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
 
   llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
 
   // 0-7 are the eight integer registers;  the order is different
   //   on Darwin (for EH), but the range is the same.
   // 8 is %eip.
   AssignToArrayRange(Builder, Address, Four8, 0, 8);
 
   if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
     // 12-16 are st(0..4).  Not sure why we stop at 4.
     // These have size 16, which is sizeof(long double) on
     // platforms with 8-byte alignment for that type.
     llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
     AssignToArrayRange(Builder, Address, Sixteen8, 12, 16);
 
   } else {
     // 9 is %eflags, which doesn't get a size on Darwin for some
     // reason.
     Builder.CreateAlignedStore(
         Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
                                CharUnits::One());
 
     // 11-16 are st(0..5).  Not sure why we stop at 5.
     // These have size 12, which is sizeof(long double) on
     // platforms with 4-byte alignment for that type.
     llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12);
     AssignToArrayRange(Builder, Address, Twelve8, 11, 16);
   }
 
   return false;
 }
 
 //===----------------------------------------------------------------------===//
 // X86-64 ABI Implementation
 //===----------------------------------------------------------------------===//
 
 
 namespace {
 /// The AVX ABI level for X86 targets.
 enum class X86AVXABILevel {
   None,
   AVX,
   AVX512
 };
 
 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
   switch (AVXLevel) {
   case X86AVXABILevel::AVX512:
     return 512;
   case X86AVXABILevel::AVX:
     return 256;
   case X86AVXABILevel::None:
     return 128;
   }
   llvm_unreachable("Unknown AVXLevel");
 }
 
 /// X86_64ABIInfo - The X86_64 ABI information.
 class X86_64ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer = 0,
     SSE,
     SSEUp,
     X87,
     X87Up,
     ComplexX87,
     NoClass,
     Memory
   };
 
   /// merge - Implement the X86_64 ABI merging algorithm.
   ///
   /// Merge an accumulating classification \arg Accum with a field
   /// classification \arg Field.
   ///
   /// \param Accum - The accumulating classification. This should
   /// always be either NoClass or the result of a previous merge
   /// call. In addition, this should never be Memory (the caller
   /// should just return Memory for the aggregate).
   static Class merge(Class Accum, Class Field);
 
   /// postMerge - Implement the X86_64 ABI post merging algorithm.
   ///
   /// Post merger cleanup, reduces a malformed Hi and Lo pair to
   /// final MEMORY or SSE classes when necessary.
   ///
   /// \param AggregateSize - The size of the current aggregate in
   /// the classification process.
   ///
   /// \param Lo - The classification for the parts of the type
   /// residing in the low word of the containing object.
   ///
   /// \param Hi - The classification for the parts of the type
   /// residing in the higher words of the containing object.
   ///
   void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
 
   /// classify - Determine the x86_64 register classes in which the
   /// given type T should be passed.
   ///
   /// \param Lo - The classification for the parts of the type
   /// residing in the low word of the containing object.
   ///
   /// \param Hi - The classification for the parts of the type
   /// residing in the high word of the containing object.
   ///
   /// \param OffsetBase - The bit offset of this type in the
   /// containing object.  Some parameters are classified different
   /// depending on whether they straddle an eightbyte boundary.
   ///
   /// \param isNamedArg - Whether the argument in question is a "named"
   /// argument, as used in AMD64-ABI 3.5.7.
   ///
   /// If a word is unused its result will be NoClass; if a type should
   /// be passed in Memory then at least the classification of \arg Lo
   /// will be Memory.
   ///
   /// The \arg Lo class will be NoClass iff the argument is ignored.
   ///
   /// If the \arg Lo class is ComplexX87, then the \arg Hi class will
   /// also be ComplexX87.
   void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
                 bool isNamedArg) const;
 
   llvm::Type *GetByteVectorType(QualType Ty) const;
   llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType,
                                  unsigned IROffset, QualType SourceTy,
                                  unsigned SourceOffset) const;
   llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType,
                                      unsigned IROffset, QualType SourceTy,
                                      unsigned SourceOffset) const;
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be returned in memory.
   ABIArgInfo getIndirectReturnResult(QualType Ty) const;
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be passed in memory.
   ///
   /// \param freeIntRegs - The number of free integer registers remaining
   /// available.
   ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
 
   ABIArgInfo classifyArgumentType(QualType Ty,
                                   unsigned freeIntRegs,
                                   unsigned &neededInt,
                                   unsigned &neededSSE,
                                   bool isNamedArg) const;
 
   bool IsIllegalVectorType(QualType Ty) const;
 
   /// The 0.98 ABI revision clarified a lot of ambiguities,
   /// unfortunately in ways that were not always consistent with
   /// certain previous compilers.  In particular, platforms which
   /// required strict binary compatibility with older versions of GCC
   /// may need to exempt themselves.
   bool honorsRevision0_98() const {
     return !getTarget().getTriple().isOSDarwin();
   }
 
   /// GCC classifies <1 x long long> as SSE but compatibility with older clang
   // compilers require us to classify it as INTEGER.
   bool classifyIntegerMMXAsSSE() const {
     const llvm::Triple &Triple = getTarget().getTriple();
     if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
       return false;
     if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10)
       return false;
     return true;
   }
 
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
   bool Has64BitPointers;
 
 public:
   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
       SwiftABIInfo(CGT), AVXLevel(AVXLevel),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
   bool isPassedUsingAVXType(QualType type) const {
     unsigned neededInt, neededSSE;
     // The freeIntRegs argument doesn't matter here.
     ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
                                            /*isNamedArg*/true);
     if (info.isDirect()) {
       llvm::Type *ty = info.getCoerceToType();
       if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
         return (vectorTy->getBitWidth() > 128);
     }
     return false;
   }
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
                       QualType Ty) const override;
 
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
 
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }  
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
 class WinX86_64ABIInfo : public ABIInfo {
 public:
   WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT)
       : ABIInfo(CGT),
         IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
     // FIXME: Assumes vectorcall is in use.
     return isX86VectorTypeForVectorCall(getContext(), Ty);
   }
 
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t NumMembers) const override {
     // FIXME: Assumes vectorcall is in use.
     return isX86VectorCallAggregateSmallEnough(NumMembers);
   }
 
 private:
   ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
                       bool IsReturnType) const;
 
   bool IsMingw64;
 };
 
 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
       : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {}
 
   const X86_64ABIInfo &getABIInfo() const {
     return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
   }
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override {
     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
 
     // 0-15 are the 16 integer registers.
     // 16 is %rip.
     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
     return false;
   }
 
   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                   StringRef Constraint,
                                   llvm::Type* Ty) const override {
     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
   }
 
   bool isNoProtoCallVariadic(const CallArgList &args,
                              const FunctionNoProtoType *fnType) const override {
     // The default CC on x86-64 sets %al to the number of SSA
     // registers used, and GCC sets this when calling an unprototyped
     // function, so we override the default behavior.  However, don't do
     // that when AVX types are involved: the ABI explicitly states it is
     // undefined, and it doesn't work in practice because of how the ABI
     // defines varargs anyway.
     if (fnType->getCallConv() == CC_C) {
       bool HasAVXType = false;
       for (CallArgList::const_iterator
              it = args.begin(), ie = args.end(); it != ie; ++it) {
         if (getABIInfo().isPassedUsingAVXType(it->Ty)) {
           HasAVXType = true;
           break;
         }
       }
 
       if (!HasAVXType)
         return true;
     }
 
     return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
   }
 
   llvm::Constant *
   getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
     unsigned Sig;
     if (getABIInfo().has64BitPointers())
       Sig = (0xeb << 0) |  // jmp rel8
             (0x0a << 8) |  //           .+0x0c
             ('F' << 16) |
             ('T' << 24);
     else
       Sig = (0xeb << 0) |  // jmp rel8
             (0x06 << 8) |  //           .+0x08
             ('F' << 16) |
             ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
       if (FD->hasAttr<AnyX86InterruptAttr>()) {
         llvm::Function *Fn = cast<llvm::Function>(GV);
         Fn->setCallingConv(llvm::CallingConv::X86_INTR);
       }
     }
   }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
 public:
   PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
     : X86_64TargetCodeGenInfo(CGT, AVXLevel) {}
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "\01";
     // If the argument contains a space, enclose it in quotes.
     if (Lib.find(" ") != StringRef::npos)
       Opt += "\"" + Lib.str() + "\"";
     else
       Opt += Lib;
   }
 };
 
 static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
   // If the argument does not end in .lib, automatically add the suffix.
   // If the argument contains a space, enclose it in quotes.
   // This matches the behavior of MSVC.
   bool Quote = (Lib.find(" ") != StringRef::npos);
   std::string ArgStr = Quote ? "\"" : "";
   ArgStr += Lib;
   if (!Lib.endswith_lower(".lib"))
     ArgStr += ".lib";
   ArgStr += Quote ? "\"" : "";
   return ArgStr;
 }
 
 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
 public:
   WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
         bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
         unsigned NumRegisterParameters)
     : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
         Win32StructABI, NumRegisterParameters, false) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:";
     Opt += qualifyWindowsLibrary(Lib);
   }
 
   void getDetectMismatchOption(llvm::StringRef Name,
                                llvm::StringRef Value,
                                llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
 };
 
 static void addStackProbeSizeTargetAttribute(const Decl *D,
                                              llvm::GlobalValue *GV,
                                              CodeGen::CodeGenModule &CGM) {
   if (D && isa<FunctionDecl>(D)) {
     if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
 
       Fn->addFnAttr("stack-probe-size",
                     llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
     }
   }
 }
 
 void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                      llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
   X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
                              X86AVXABILevel AVXLevel)
       : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override {
     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
 
     // 0-15 are the 16 integer registers.
     // 16 is %rip.
     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
     return false;
   }
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:";
     Opt += qualifyWindowsLibrary(Lib);
   }
 
   void getDetectMismatchOption(llvm::StringRef Name,
                                llvm::StringRef Value,
                                llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
 };
 
 void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                      llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
   TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (FD->hasAttr<AnyX86InterruptAttr>()) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
       Fn->setCallingConv(llvm::CallingConv::X86_INTR);
     }
   }
 
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 }
 
 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
                               Class &Hi) const {
   // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
   //
   // (a) If one of the classes is Memory, the whole argument is passed in
   //     memory.
   //
   // (b) If X87UP is not preceded by X87, the whole argument is passed in
   //     memory.
   //
   // (c) If the size of the aggregate exceeds two eightbytes and the first
   //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
   //     argument is passed in memory. NOTE: This is necessary to keep the
   //     ABI working for processors that don't support the __m256 type.
   //
   // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
   //
   // Some of these are enforced by the merging logic.  Others can arise
   // only with unions; for example:
   //   union { _Complex double; unsigned; }
   //
   // Note that clauses (b) and (c) were added in 0.98.
   //
   if (Hi == Memory)
     Lo = Memory;
   if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
     Lo = Memory;
   if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
     Lo = Memory;
   if (Hi == SSEUp && Lo != SSE)
     Hi = SSE;
 }
 
 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
   // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
   // classified recursively so that always two fields are
   // considered. The resulting class is calculated according to
   // the classes of the fields in the eightbyte:
   //
   // (a) If both classes are equal, this is the resulting class.
   //
   // (b) If one of the classes is NO_CLASS, the resulting class is
   // the other class.
   //
   // (c) If one of the classes is MEMORY, the result is the MEMORY
   // class.
   //
   // (d) If one of the classes is INTEGER, the result is the
   // INTEGER.
   //
   // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
   // MEMORY is used as class.
   //
   // (f) Otherwise class SSE is used.
 
   // Accum should never be memory (we should have returned) or
   // ComplexX87 (because this cannot be passed in a structure).
   assert((Accum != Memory && Accum != ComplexX87) &&
          "Invalid accumulated classification during merge.");
   if (Accum == Field || Field == NoClass)
     return Accum;
   if (Field == Memory)
     return Memory;
   if (Accum == NoClass)
     return Field;
   if (Accum == Integer || Field == Integer)
     return Integer;
   if (Field == X87 || Field == X87Up || Field == ComplexX87 ||
       Accum == X87 || Accum == X87Up)
     return Memory;
   return SSE;
 }
 
 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
                              Class &Lo, Class &Hi, bool isNamedArg) const {
   // FIXME: This code can be simplified by introducing a simple value class for
   // Class pairs with appropriate constructor methods for the various
   // situations.
 
   // FIXME: Some of the split computations are wrong; unaligned vectors
   // shouldn't be passed in registers for example, so there is no chance they
   // can straddle an eightbyte. Verify & simplify.
 
   Lo = Hi = NoClass;
 
   Class &Current = OffsetBase < 64 ? Lo : Hi;
   Current = Memory;
 
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     BuiltinType::Kind k = BT->getKind();
 
     if (k == BuiltinType::Void) {
       Current = NoClass;
     } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) {
       Lo = Integer;
       Hi = Integer;
     } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
       Current = Integer;
     } else if (k == BuiltinType::Float || k == BuiltinType::Double) {
       Current = SSE;
     } else if (k == BuiltinType::LongDouble) {
       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
       if (LDF == &llvm::APFloat::IEEEquad) {
         Lo = SSE;
         Hi = SSEUp;
       } else if (LDF == &llvm::APFloat::x87DoubleExtended) {
         Lo = X87;
         Hi = X87Up;
       } else if (LDF == &llvm::APFloat::IEEEdouble) {
         Current = SSE;
       } else
         llvm_unreachable("unexpected long double representation!");
     }
     // FIXME: _Decimal32 and _Decimal64 are SSE.
     // FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
     return;
   }
 
   if (const EnumType *ET = Ty->getAs<EnumType>()) {
     // Classify the underlying integer type.
     classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
     return;
   }
 
   if (Ty->hasPointerRepresentation()) {
     Current = Integer;
     return;
   }
 
   if (Ty->isMemberPointerType()) {
     if (Ty->isMemberFunctionPointerType()) {
       if (Has64BitPointers) {
         // If Has64BitPointers, this is an {i64, i64}, so classify both
         // Lo and Hi now.
         Lo = Hi = Integer;
       } else {
         // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
         // straddles an eightbyte boundary, Hi should be classified as well.
         uint64_t EB_FuncPtr = (OffsetBase) / 64;
         uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64;
         if (EB_FuncPtr != EB_ThisAdj) {
           Lo = Hi = Integer;
         } else {
           Current = Integer;
         }
       }
     } else {
       Current = Integer;
     }
     return;
   }
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VT);
     if (Size == 1 || Size == 8 || Size == 16 || Size == 32) {
       // gcc passes the following as integer:
       // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
       // 2 bytes - <2 x char>, <1 x short>
       // 1 byte  - <1 x char>
       Current = Integer;
 
       // If this type crosses an eightbyte boundary, it should be
       // split.
       uint64_t EB_Lo = (OffsetBase) / 64;
       uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
       if (EB_Lo != EB_Hi)
         Hi = Lo;
     } else if (Size == 64) {
       QualType ElementType = VT->getElementType();
 
       // gcc passes <1 x double> in memory. :(
       if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
         return;
 
       // gcc passes <1 x long long> as SSE but clang used to unconditionally
       // pass them as integer.  For platforms where clang is the de facto
       // platform compiler, we must continue to use integer.
       if (!classifyIntegerMMXAsSSE() &&
           (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
            ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
            ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
            ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
         Current = Integer;
       else
         Current = SSE;
 
       // If this type crosses an eightbyte boundary, it should be
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
     } else if (Size == 128 ||
                (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
       // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
       // This design isn't correct for 256-bits, but since there're no cases
       // where the upper parts would need to be inspected, avoid adding
       // complexity and just consider Hi to match the 64-256 part.
       //
       // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
       // registers if they are "named", i.e. not part of the "..." of a
       // variadic function.
       //
       // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
       // split into eight eightbyte chunks, one SSE and seven SSEUP.
       Lo = SSE;
       Hi = SSEUp;
     }
     return;
   }
 
   if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
     QualType ET = getContext().getCanonicalType(CT->getElementType());
 
     uint64_t Size = getContext().getTypeSize(Ty);
     if (ET->isIntegralOrEnumerationType()) {
       if (Size <= 64)
         Current = Integer;
       else if (Size <= 128)
         Lo = Hi = Integer;
     } else if (ET == getContext().FloatTy) {
       Current = SSE;
     } else if (ET == getContext().DoubleTy) {
       Lo = Hi = SSE;
     } else if (ET == getContext().LongDoubleTy) {
       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
       if (LDF == &llvm::APFloat::IEEEquad)
         Current = Memory;
       else if (LDF == &llvm::APFloat::x87DoubleExtended)
         Current = ComplexX87;
       else if (LDF == &llvm::APFloat::IEEEdouble)
         Lo = Hi = SSE;
       else
         llvm_unreachable("unexpected long double representation!");
     }
 
     // If this complex type crosses an eightbyte boundary then it
     // should be split.
     uint64_t EB_Real = (OffsetBase) / 64;
     uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64;
     if (Hi == NoClass && EB_Real != EB_Imag)
       Hi = Lo;
 
     return;
   }
 
   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
     // Arrays are treated like structures.
 
     uint64_t Size = getContext().getTypeSize(Ty);
 
     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
     // than four eightbytes, ..., it has class MEMORY.
     if (Size > 256)
       return;
 
     // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
     // fields, it has class MEMORY.
     //
     // Only need to check alignment of array base.
     if (OffsetBase % getContext().getTypeAlign(AT->getElementType()))
       return;
 
     // Otherwise implement simplified merge. We could be smarter about
     // this, but it isn't worth it and would be harder to verify.
     Current = NoClass;
     uint64_t EltSize = getContext().getTypeSize(AT->getElementType());
     uint64_t ArraySize = AT->getSize().getZExtValue();
 
     // The only case a 256-bit wide vector could be used is when the array
     // contains a single 256-bit element. Since Lo and Hi logic isn't extended
     // to work for sizes wider than 128, early check and fallback to memory.
     if (Size > 128 && EltSize != 256)
       return;
 
     for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
       Class FieldLo, FieldHi;
       classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
       Lo = merge(Lo, FieldLo);
       Hi = merge(Hi, FieldHi);
       if (Lo == Memory || Hi == Memory)
         break;
     }
 
     postMerge(Size, Lo, Hi);
     assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
     return;
   }
 
   if (const RecordType *RT = Ty->getAs<RecordType>()) {
     uint64_t Size = getContext().getTypeSize(Ty);
 
     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
     // than four eightbytes, ..., it has class MEMORY.
     if (Size > 256)
       return;
 
     // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
     // copy constructor or a non-trivial destructor, it is passed by invisible
     // reference.
     if (getRecordArgABI(RT, getCXXABI()))
       return;
 
     const RecordDecl *RD = RT->getDecl();
 
     // Assume variable sized types are passed in memory.
     if (RD->hasFlexibleArrayMember())
       return;
 
     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
 
     // Reset Lo class, this will be recomputed.
     Current = NoClass;
 
     // If this is a C++ record, classify the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
       for (const auto &I : CXXRD->bases()) {
         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
                "Unexpected base class!");
         const CXXRecordDecl *Base =
           cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
 
         // Classify this field.
         //
         // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
         // single eightbyte, each is classified separately. Each eightbyte gets
         // initialized to class NO_CLASS.
         Class FieldLo, FieldHi;
         uint64_t Offset =
           OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
         classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
         Lo = merge(Lo, FieldLo);
         Hi = merge(Hi, FieldHi);
         if (Lo == Memory || Hi == Memory) {
           postMerge(Size, Lo, Hi);
           return;
         }
       }
     }
 
     // Classify the fields one at a time, merging the results.
     unsigned idx = 0;
     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
            i != e; ++i, ++idx) {
       uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
       bool BitField = i->isBitField();
 
       // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
       // four eightbytes, or it contains unaligned fields, it has class MEMORY.
       //
       // The only case a 256-bit wide vector could be used is when the struct
       // contains a single 256-bit element. Since Lo and Hi logic isn't extended
       // to work for sizes wider than 128, early check and fallback to memory.
       //
       if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) {
         Lo = Memory;
         postMerge(Size, Lo, Hi);
         return;
       }
       // Note, skip this test for bit-fields, see below.
       if (!BitField && Offset % getContext().getTypeAlign(i->getType())) {
         Lo = Memory;
         postMerge(Size, Lo, Hi);
         return;
       }
 
       // Classify this field.
       //
       // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
       // exceeds a single eightbyte, each is classified
       // separately. Each eightbyte gets initialized to class
       // NO_CLASS.
       Class FieldLo, FieldHi;
 
       // Bit-fields require special handling, they do not force the
       // structure to be passed in memory even if unaligned, and
       // therefore they can straddle an eightbyte.
       if (BitField) {
         // Ignore padding bit-fields.
         if (i->isUnnamedBitfield())
           continue;
 
         uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
         uint64_t Size = i->getBitWidthValue(getContext());
 
         uint64_t EB_Lo = Offset / 64;
         uint64_t EB_Hi = (Offset + Size - 1) / 64;
 
         if (EB_Lo) {
           assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
           FieldLo = NoClass;
           FieldHi = Integer;
         } else {
           FieldLo = Integer;
           FieldHi = EB_Hi ? Integer : NoClass;
         }
       } else
         classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
       Lo = merge(Lo, FieldLo);
       Hi = merge(Hi, FieldHi);
       if (Lo == Memory || Hi == Memory)
         break;
     }
 
     postMerge(Size, Lo, Hi);
   }
 }
 
 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
   // If this is a scalar LLVM value then assume LLVM will pass it in the right
   // place naturally.
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
     return (Ty->isPromotableIntegerType() ?
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
   return getNaturalAlignIndirect(Ty);
 }
 
 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VecTy);
     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
     if (Size <= 64 || Size > LargestVector)
       return true;
   }
 
   return false;
 }
 
 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
                                             unsigned freeIntRegs) const {
   // If this is a scalar LLVM value then assume LLVM will pass it in the right
   // place naturally.
   //
   // This assumption is optimistic, as there could be free registers available
   // when we need to pass this argument in memory, and LLVM could try to pass
   // the argument in the free register. This does not seem to happen currently,
   // but this code would be much safer if we could mark the argument with
   // 'onstack'. See PR12193.
   if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
     return (Ty->isPromotableIntegerType() ?
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // Compute the byval alignment. We specify the alignment of the byval in all
   // cases so that the mid-level optimizer knows the alignment of the byval.
   unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
 
   // Attempt to avoid passing indirect results using byval when possible. This
   // is important for good codegen.
   //
   // We do this by coercing the value into a scalar type which the backend can
   // handle naturally (i.e., without using byval).
   //
   // For simplicity, we currently only do this when we have exhausted all of the
   // free integer registers. Doing this when there are free integer registers
   // would require more care, as we would have to ensure that the coerced value
   // did not claim the unused register. That would require either reording the
   // arguments to the function (so that any subsequent inreg values came first),
   // or only doing this optimization when there were no following arguments that
   // might be inreg.
   //
   // We currently expect it to be rare (particularly in well written code) for
   // arguments to be passed on the stack when there are still free integer
   // registers available (this would typically imply large structs being passed
   // by value), so this seems like a fair tradeoff for now.
   //
   // We can revisit this if the backend grows support for 'onstack' parameter
   // attributes. See PR12193.
   if (freeIntRegs == 0) {
     uint64_t Size = getContext().getTypeSize(Ty);
 
     // If this type fits in an eightbyte, coerce it into the matching integral
     // type, which will end up on the stack (with alignment 8).
     if (Align == 8 && Size <= 64)
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                           Size));
   }
 
   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
 }
 
 /// The ABI specifies that a value should be passed in a full vector XMM/YMM
 /// register. Pick an LLVM IR type that will be passed as a vector register.
 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
   // Wrapper structs/arrays that only contain vectors are passed just like
   // vectors; strip them off if present.
   if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
     Ty = QualType(InnerTy, 0);
 
   llvm::Type *IRType = CGT.ConvertType(Ty);
   if (isa<llvm::VectorType>(IRType) ||
       IRType->getTypeID() == llvm::Type::FP128TyID)
     return IRType;
 
   // We couldn't find the preferred IR vector type for 'Ty'.
   uint64_t Size = getContext().getTypeSize(Ty);
   assert((Size == 128 || Size == 256) && "Invalid type found!");
 
   // Return a LLVM IR vector type based on the size of 'Ty'.
   return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
                                Size / 64);
 }
 
 /// BitsContainNoUserData - Return true if the specified [start,end) bit range
 /// is known to either be off the end of the specified type or being in
 /// alignment padding.  The user type specified is known to be at most 128 bits
 /// in size, and have passed through X86_64ABIInfo::classify with a successful
 /// classification that put one of the two halves in the INTEGER class.
 ///
 /// It is conservatively correct to return false.
 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
                                   unsigned EndBit, ASTContext &Context) {
   // If the bytes being queried are off the end of the type, there is no user
   // data hiding here.  This handles analysis of builtins, vectors and other
   // types that don't contain interesting padding.
   unsigned TySize = (unsigned)Context.getTypeSize(Ty);
   if (TySize <= StartBit)
     return true;
 
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
     unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType());
     unsigned NumElts = (unsigned)AT->getSize().getZExtValue();
 
     // Check each element to see if the element overlaps with the queried range.
     for (unsigned i = 0; i != NumElts; ++i) {
       // If the element is after the span we care about, then we're done..
       unsigned EltOffset = i*EltSize;
       if (EltOffset >= EndBit) break;
 
       unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0;
       if (!BitsContainNoUserData(AT->getElementType(), EltStart,
                                  EndBit-EltOffset, Context))
         return false;
     }
     // If it overlaps no elements, then it is safe to process as padding.
     return true;
   }
 
   if (const RecordType *RT = Ty->getAs<RecordType>()) {
     const RecordDecl *RD = RT->getDecl();
     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
       for (const auto &I : CXXRD->bases()) {
         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
                "Unexpected base class!");
         const CXXRecordDecl *Base =
           cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
 
         // If the base is after the span we care about, ignore it.
         unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
         if (BaseOffset >= EndBit) continue;
 
         unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
         if (!BitsContainNoUserData(I.getType(), BaseStart,
                                    EndBit-BaseOffset, Context))
           return false;
       }
     }
 
     // Verify that no field has data that overlaps the region of interest.  Yes
     // this could be sped up a lot by being smarter about queried fields,
     // however we're only looking at structs up to 16 bytes, so we don't care
     // much.
     unsigned idx = 0;
     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
          i != e; ++i, ++idx) {
       unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);
 
       // If we found a field after the region we care about, then we're done.
       if (FieldOffset >= EndBit) break;
 
       unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0;
       if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset,
                                  Context))
         return false;
     }
 
     // If nothing in this record overlapped the area of interest, then we're
     // clean.
     return true;
   }
 
   return false;
 }
 
 /// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a
 /// float member at the specified offset.  For example, {int,{float}} has a
 /// float at offset 4.  It is conservatively correct for this routine to return
 /// false.
 static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset,
                                   const llvm::DataLayout &TD) {
   // Base case if we find a float.
   if (IROffset == 0 && IRType->isFloatTy())
     return true;
 
   // If this is a struct, recurse into the field at the specified offset.
   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
     const llvm::StructLayout *SL = TD.getStructLayout(STy);
     unsigned Elt = SL->getElementContainingOffset(IROffset);
     IROffset -= SL->getElementOffset(Elt);
     return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD);
   }
 
   // If this is an array, recurse into the field at the specified offset.
   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
     llvm::Type *EltTy = ATy->getElementType();
     unsigned EltSize = TD.getTypeAllocSize(EltTy);
     IROffset -= IROffset/EltSize*EltSize;
     return ContainsFloatAtOffset(EltTy, IROffset, TD);
   }
 
   return false;
 }
 
 
 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
 /// low 8 bytes of an XMM register, corresponding to the SSE class.
 llvm::Type *X86_64ABIInfo::
 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
                    QualType SourceTy, unsigned SourceOffset) const {
   // The only three choices we have are either double, <2 x float>, or float. We
   // pass as float if the last 4 bytes is just padding.  This happens for
   // structs that contain 3 floats.
   if (BitsContainNoUserData(SourceTy, SourceOffset*8+32,
                             SourceOffset*8+64, getContext()))
     return llvm::Type::getFloatTy(getVMContext());
 
   // We want to pass as <2 x float> if the LLVM IR type contains a float at
   // offset+0 and offset+4.  Walk the LLVM IR type to find out if this is the
   // case.
   if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) &&
       ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout()))
     return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
 
   return llvm::Type::getDoubleTy(getVMContext());
 }
 
 
 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
 /// an 8-byte GPR.  This means that we either have a scalar or we are talking
 /// about the high or low part of an up-to-16-byte struct.  This routine picks
 /// the best LLVM IR type to represent this, which may be i64 or may be anything
 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
 /// etc).
 ///
 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
 /// the source type.  IROffset is an offset in bytes into the LLVM IR type that
 /// the 8-byte value references.  PrefType may be null.
 ///
 /// SourceTy is the source-level type for the entire argument.  SourceOffset is
 /// an offset into this that we're processing (which is always either 0 or 8).
 ///
 llvm::Type *X86_64ABIInfo::
 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
                        QualType SourceTy, unsigned SourceOffset) const {
   // If we're dealing with an un-offset LLVM IR type, then it means that we're
   // returning an 8-byte unit starting with it.  See if we can safely use it.
   if (IROffset == 0) {
     // Pointers and int64's always fill the 8-byte unit.
     if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) ||
         IRType->isIntegerTy(64))
       return IRType;
 
     // If we have a 1/2/4-byte integer, we can use it only if the rest of the
     // goodness in the source type is just tail padding.  This is allowed to
     // kick in for struct {double,int} on the int, but not on
     // struct{double,int,int} because we wouldn't return the second int.  We
     // have to do this analysis on the source type because we can't depend on
     // unions being lowered a specific way etc.
     if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) ||
         IRType->isIntegerTy(32) ||
         (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) {
       unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 :
           cast<llvm::IntegerType>(IRType)->getBitWidth();
 
       if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth,
                                 SourceOffset*8+64, getContext()))
         return IRType;
     }
   }
 
   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
     // If this is a struct, recurse into the field at the specified offset.
     const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy);
     if (IROffset < SL->getSizeInBytes()) {
       unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
       IROffset -= SL->getElementOffset(FieldIdx);
 
       return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset,
                                     SourceTy, SourceOffset);
     }
   }
 
   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
     llvm::Type *EltTy = ATy->getElementType();
     unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy);
     unsigned EltOffset = IROffset/EltSize*EltSize;
     return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy,
                                   SourceOffset);
   }
 
   // Okay, we don't have any better idea of what to pass, so we pass this in an
   // integer register that isn't too big to fit the rest of the struct.
   unsigned TySizeInBytes =
     (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();
 
   assert(TySizeInBytes != SourceOffset && "Empty field?");
 
   // It is always safe to classify this as an integer type up to i64 that
   // isn't larger than the structure.
   return llvm::IntegerType::get(getVMContext(),
                                 std::min(TySizeInBytes-SourceOffset, 8U)*8);
 }
 
 
 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
 /// be used as elements of a two register pair to pass or return, return a
 /// first class aggregate to represent them.  For example, if the low part of
 /// a by-value argument should be passed as i32* and the high part as float,
 /// return {i32*, float}.
 static llvm::Type *
 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
                            const llvm::DataLayout &TD) {
   // In order to correctly satisfy the ABI, we need to the high part to start
   // at offset 8.  If the high and low parts we inferred are both 4-byte types
   // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
   // the second element at offset 8.  Check for this:
   unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
   unsigned HiAlign = TD.getABITypeAlignment(Hi);
   unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
   assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
 
   // To handle this, we have to increase the size of the low part so that the
   // second element will start at an 8 byte offset.  We can't increase the size
   // of the second element because it might make us access off the end of the
   // struct.
   if (HiStart != 8) {
     // There are usually two sorts of types the ABI generation code can produce
     // for the low part of a pair that aren't 8 bytes in size: float or
     // i8/i16/i32.  This can also include pointers when they are 32-bit (X32 and
     // NaCl).
     // Promote these to a larger type.
     if (Lo->isFloatTy())
       Lo = llvm::Type::getDoubleTy(Lo->getContext());
     else {
       assert((Lo->isIntegerTy() || Lo->isPointerTy())
              && "Invalid/unknown lo type");
       Lo = llvm::Type::getInt64Ty(Lo->getContext());
     }
   }
 
   llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr);
 
 
   // Verify that the second element is at an 8-byte offset.
   assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
          "Invalid x86-64 argument pair!");
   return Result;
 }
 
 ABIArgInfo X86_64ABIInfo::
 classifyReturnType(QualType RetTy) const {
   // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
   // classification algorithm.
   X86_64ABIInfo::Class Lo, Hi;
   classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
 
   // Check some invariants.
   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
 
   llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
       return ABIArgInfo::getIgnore();
     // If the low part is just padding, it takes no register, leave ResType
     // null.
     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
            "Unknown missing lo part");
     break;
 
   case SSEUp:
   case X87Up:
     llvm_unreachable("Invalid classification for lo word.");
 
     // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
     // hidden argument.
   case Memory:
     return getIndirectReturnResult(RetTy);
 
     // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
     // available register of the sequence %rax, %rdx is used.
   case Integer:
     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
 
     // If we have a sign or zero extended integer, make sure to return Extend
     // so that the parameter gets the right LLVM IR attributes.
     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
       // Treat an enum type as its underlying type.
       if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
         RetTy = EnumTy->getDecl()->getIntegerType();
 
       if (RetTy->isIntegralOrEnumerationType() &&
           RetTy->isPromotableIntegerType())
         return ABIArgInfo::getExtend();
     }
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
     // available SSE register of the sequence %xmm0, %xmm1 is used.
   case SSE:
     ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
     // returned on the X87 stack in %st0 as 80-bit x87 number.
   case X87:
     ResType = llvm::Type::getX86_FP80Ty(getVMContext());
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
     // part of the value is returned in %st0 and the imaginary part in
     // %st1.
   case ComplexX87:
     assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
     ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
                                     llvm::Type::getX86_FP80Ty(getVMContext()),
                                     nullptr);
     break;
   }
 
   llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously and X87 should
     // never occur as a hi class.
   case Memory:
   case X87:
     llvm_unreachable("Invalid classification for hi word.");
 
   case ComplexX87: // Previously handled.
   case NoClass:
     break;
 
   case Integer:
     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
       return ABIArgInfo::getDirect(HighPart, 8);
     break;
   case SSE:
     HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
       return ABIArgInfo::getDirect(HighPart, 8);
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
     // is passed in the next available eightbyte chunk if the last used
     // vector register.
     //
     // SSEUP should always be preceded by SSE, just widen.
   case SSEUp:
     assert(Lo == SSE && "Unexpected SSEUp classification.");
     ResType = GetByteVectorType(RetTy);
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
     // returned together with the previous X87 value in %st0.
   case X87Up:
     // If X87Up is preceded by X87, we don't need to do
     // anything. However, in some cases with unions it may not be
     // preceded by X87. In such situations we follow gcc and pass the
     // extra bits in an SSE reg.
     if (Lo != X87) {
       HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
       if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
         return ABIArgInfo::getDirect(HighPart, 8);
     }
     break;
   }
 
   // If a high part was specified, merge it together with the low part.  It is
   // known to pass in the high eightbyte of the result.  We do this by forming a
   // first class struct aggregate with the high and low part: {low, high}
   if (HighPart)
     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
 
   return ABIArgInfo::getDirect(ResType);
 }
 
 ABIArgInfo X86_64ABIInfo::classifyArgumentType(
   QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE,
   bool isNamedArg)
   const
 {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   X86_64ABIInfo::Class Lo, Hi;
   classify(Ty, 0, Lo, Hi, isNamedArg);
 
   // Check some invariants.
   // FIXME: Enforce these by construction.
   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
 
   neededInt = 0;
   neededSSE = 0;
   llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
       return ABIArgInfo::getIgnore();
     // If the low part is just padding, it takes no register, leave ResType
     // null.
     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
            "Unknown missing lo part");
     break;
 
     // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
     // on the stack.
   case Memory:
 
     // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
     // COMPLEX_X87, it is passed in memory.
   case X87:
   case ComplexX87:
     if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect)
       ++neededInt;
     return getIndirectResult(Ty, freeIntRegs);
 
   case SSEUp:
   case X87Up:
     llvm_unreachable("Invalid classification for lo word.");
 
     // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
     // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
     // and %r9 is used.
   case Integer:
     ++neededInt;
 
     // Pick an 8-byte type based on the preferred type.
     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);
 
     // If we have a sign or zero extended integer, make sure to return Extend
     // so that the parameter gets the right LLVM IR attributes.
     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
       // Treat an enum type as its underlying type.
       if (const EnumType *EnumTy = Ty->getAs<EnumType>())
         Ty = EnumTy->getDecl()->getIntegerType();
 
       if (Ty->isIntegralOrEnumerationType() &&
           Ty->isPromotableIntegerType())
         return ABIArgInfo::getExtend();
     }
 
     break;
 
     // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
     // available SSE register is used, the registers are taken in the
     // order from %xmm0 to %xmm7.
   case SSE: {
     llvm::Type *IRType = CGT.ConvertType(Ty);
     ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0);
     ++neededSSE;
     break;
   }
   }
 
   llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously, ComplexX87 and X87 should
     // never occur as hi classes, and X87Up must be preceded by X87,
     // which is passed in memory.
   case Memory:
   case X87:
   case ComplexX87:
     llvm_unreachable("Invalid classification for hi word.");
 
   case NoClass: break;
 
   case Integer:
     ++neededInt;
     // Pick an 8-byte type based on the preferred type.
     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
 
     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
       return ABIArgInfo::getDirect(HighPart, 8);
     break;
 
     // X87Up generally doesn't occur here (long double is passed in
     // memory), except in situations involving unions.
   case X87Up:
   case SSE:
     HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
 
     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
       return ABIArgInfo::getDirect(HighPart, 8);
 
     ++neededSSE;
     break;
 
     // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
     // eightbyte is passed in the upper half of the last used SSE
     // register.  This only happens when 128-bit vectors are passed.
   case SSEUp:
     assert(Lo == SSE && "Unexpected SSEUp classification");
     ResType = GetByteVectorType(Ty);
     break;
   }
 
   // If a high part was specified, merge it together with the low part.  It is
   // known to pass in the high eightbyte of the result.  We do this by forming a
   // first class struct aggregate with the high and low part: {low, high}
   if (HighPart)
     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
 
   return ABIArgInfo::getDirect(ResType);
 }
 
 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
   // Keep track of the number of assigned registers.
   unsigned freeIntRegs = 6, freeSSERegs = 8;
 
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
   if (FI.getReturnInfo().isIndirect())
     --freeIntRegs;
 
   // The chain argument effectively gives us another free register.
   if (FI.isChainCall())
     ++freeIntRegs;
 
   unsigned NumRequiredArgs = FI.getNumRequiredArgs();
   // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
   // get assigned (in left-to-right order) for passing as follows...
   unsigned ArgNo = 0;
   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
        it != ie; ++it, ++ArgNo) {
     bool IsNamedArg = ArgNo < NumRequiredArgs;
 
     unsigned neededInt, neededSSE;
     it->info = classifyArgumentType(it->type, freeIntRegs, neededInt,
                                     neededSSE, IsNamedArg);
 
     // AMD64-ABI 3.2.3p3: If there are no registers available for any
     // eightbyte of an argument, the whole argument is passed on the
     // stack. If registers have already been assigned for some
     // eightbytes of such an argument, the assignments get reverted.
     if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) {
       freeIntRegs -= neededInt;
       freeSSERegs -= neededSSE;
     } else {
       it->info = getIndirectResult(it->type, freeIntRegs);
     }
   }
 }
 
 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
                                          Address VAListAddr, QualType Ty) {
   Address overflow_arg_area_p = CGF.Builder.CreateStructGEP(
       VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p");
   llvm::Value *overflow_arg_area =
     CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
 
   // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
   // byte boundary if alignment needed by type exceeds 8 byte boundary.
   // It isn't stated explicitly in the standard, but in practice we use
   // alignment greater than 16 where necessary.
   CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
   if (Align > CharUnits::fromQuantity(8)) {
     overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
                                                       Align);
   }
 
   // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
   llvm::Value *Res =
     CGF.Builder.CreateBitCast(overflow_arg_area,
                               llvm::PointerType::getUnqual(LTy));
 
   // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
   // l->overflow_arg_area + sizeof(type).
   // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
   // an 8 byte boundary.
 
   uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8;
   llvm::Value *Offset =
       llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7)  & ~7);
   overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset,
                                             "overflow_arg_area.next");
   CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
 
   // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
   return Address(Res, Align);
 }
 
 Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                  QualType Ty) const {
   // Assume that va_list type is correct; should be pointer to LLVM type:
   // struct {
   //   i32 gp_offset;
   //   i32 fp_offset;
   //   i8* overflow_arg_area;
   //   i8* reg_save_area;
   // };
   unsigned neededInt, neededSSE;
 
   Ty = getContext().getCanonicalType(Ty);
   ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
                                        /*isNamedArg*/false);
 
   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
   // in the registers. If not go to step 7.
   if (!neededInt && !neededSSE)
     return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
 
   // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
   // general purpose registers needed to pass type and num_fp to hold
   // the number of floating point registers needed.
 
   // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
   // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
   // l->fp_offset > 304 - num_fp * 16 go to step 7.
   //
   // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
   // register save space).
 
   llvm::Value *InRegs = nullptr;
   Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
   llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
   if (neededInt) {
     gp_offset_p =
         CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(),
                                     "gp_offset_p");
     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
     InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
     InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
   }
 
   if (neededSSE) {
     fp_offset_p =
         CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4),
                                     "fp_offset_p");
     fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
     llvm::Value *FitsInFP =
       llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
     FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp");
     InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP;
   }
 
   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
   CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
 
   // Emit code to load the value if it was passed in registers.
 
   CGF.EmitBlock(InRegBlock);
 
   // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
   // an offset of l->gp_offset and/or l->fp_offset. This may require
   // copying to a temporary location in case the parameter is passed
   // in different register classes or requires an alignment greater
   // than 8 for general purpose registers and 16 for XMM registers.
   //
   // FIXME: This really results in shameful code when we end up needing to
   // collect arguments from different places; often what should result in a
   // simple assembling of a structure from scattered addresses has many more
   // loads than necessary. Can we clean this up?
   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
   llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
       CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)),
                                   "reg_save_area");
 
   Address RegAddr = Address::invalid();
   if (neededInt && neededSSE) {
     // FIXME: Cleanup.
     assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
     llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
     Address Tmp = CGF.CreateMemTemp(Ty);
     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
     assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
     llvm::Type *TyLo = ST->getElementType(0);
     llvm::Type *TyHi = ST->getElementType(1);
     assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
            "Unexpected ABI info for mixed regs");
     llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
     llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
     llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset);
     llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset);
     llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
     llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
 
     // Copy the first element.
     llvm::Value *V =
       CGF.Builder.CreateDefaultAlignedLoad(
                                CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
     CGF.Builder.CreateStore(V,
                     CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
 
     // Copy the second element.
     V = CGF.Builder.CreateDefaultAlignedLoad(
                                CGF.Builder.CreateBitCast(RegHiAddr, PTyHi));
     CharUnits Offset = CharUnits::fromQuantity(
                    getDataLayout().getStructLayout(ST)->getElementOffset(1));
     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset));
 
     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
   } else if (neededInt) {
     RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset),
                       CharUnits::fromQuantity(8));
     RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
 
     // Copy to a temporary if necessary to ensure the appropriate alignment.
     std::pair<CharUnits, CharUnits> SizeAlign =
         getContext().getTypeInfoInChars(Ty);
     uint64_t TySize = SizeAlign.first.getQuantity();
     CharUnits TyAlign = SizeAlign.second;
 
     // Copy into a temporary if the type is more aligned than the
     // register save area.
     if (TyAlign.getQuantity() > 8) {
       Address Tmp = CGF.CreateMemTemp(Ty);
       CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
       RegAddr = Tmp;
     }
     
   } else if (neededSSE == 1) {
     RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
                       CharUnits::fromQuantity(16));
     RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
   } else {
     assert(neededSSE == 2 && "Invalid number of needed registers!");
     // SSE registers are spaced 16 bytes apart in the register save
     // area, we need to collect the two eightbytes together.
     // The ABI isn't explicit about this, but it seems reasonable
     // to assume that the slots are 16-byte aligned, since the stack is
     // naturally 16-byte aligned and the prologue is expected to store
     // all the SSE registers to the RSA.
     Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
                                 CharUnits::fromQuantity(16));
     Address RegAddrHi =
       CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
                                              CharUnits::fromQuantity(16));
     llvm::Type *DoubleTy = CGF.DoubleTy;
     llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
     llvm::Value *V;
     Address Tmp = CGF.CreateMemTemp(Ty);
     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
     V = CGF.Builder.CreateLoad(
                    CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
     CGF.Builder.CreateStore(V,
                    CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
     V = CGF.Builder.CreateLoad(
                    CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
     CGF.Builder.CreateStore(V,
           CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
 
     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
   }
 
   // AMD64-ABI 3.5.7p5: Step 5. Set:
   // l->gp_offset = l->gp_offset + num_gp * 8
   // l->fp_offset = l->fp_offset + num_fp * 16.
   if (neededInt) {
     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8);
     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset),
                             gp_offset_p);
   }
   if (neededSSE) {
     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16);
     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset),
                             fp_offset_p);
   }
   CGF.EmitBranch(ContBlock);
 
   // Emit code to load the value if it was passed in memory.
 
   CGF.EmitBlock(InMemBlock);
   Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
 
   // Return the appropriate result.
 
   CGF.EmitBlock(ContBlock);
   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
                                  "vaarg.addr");
   return ResAddr;
 }
 
 Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                    QualType Ty) const {
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
                           CGF.getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(8),
                           /*allowHigherAlign*/ false);
 }
 
 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
                                       bool IsReturnType) const {
 
   if (Ty->isVoidType())
     return ABIArgInfo::getIgnore();
 
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   TypeInfo Info = getContext().getTypeInfo(Ty);
   uint64_t Width = Info.Width;
   CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);
 
   const RecordType *RT = Ty->getAs<RecordType>();
   if (RT) {
     if (!IsReturnType) {
       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
         return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
     if (RT->getDecl()->hasFlexibleArrayMember())
       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   }
 
   // vectorcall adds the concept of a homogenous vector aggregate, similar to
   // other targets.
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
   if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) {
     if (FreeSSERegs >= NumElts) {
       FreeSSERegs -= NumElts;
       if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
         return ABIArgInfo::getDirect();
       return ABIArgInfo::getExpand();
     }
     return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
   }
 
 
   if (Ty->isMemberPointerType()) {
     // If the member pointer is represented by an LLVM int or ptr, pass it
     // directly.
     llvm::Type *LLTy = CGT.ConvertType(Ty);
     if (LLTy->isPointerTy() || LLTy->isIntegerTy())
       return ABIArgInfo::getDirect();
   }
 
   if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) {
     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
     // not 1, 2, 4, or 8 bytes, must be passed by reference."
     if (Width > 64 || !llvm::isPowerOf2_64(Width))
       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
     // Otherwise, coerce it to a small integer.
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
   }
 
   // Bool type is always extended to the ABI, other builtin types are not
   // extended.
   const BuiltinType *BT = Ty->getAs<BuiltinType>();
   if (BT && BT->getKind() == BuiltinType::Bool)
     return ABIArgInfo::getExtend();
 
   // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It
   // passes them indirectly through memory.
   if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) {
     const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
     if (LDF == &llvm::APFloat::x87DoubleExtended)
       return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
   }
 
   return ABIArgInfo::getDirect();
 }
 
 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   bool IsVectorCall =
       FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall;
 
   // We can use up to 4 SSE return registers with vectorcall.
   unsigned FreeSSERegs = IsVectorCall ? 4 : 0;
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true);
 
   // We can use up to 6 SSE register parameters with vectorcall.
   FreeSSERegs = IsVectorCall ? 6 : 0;
   for (auto &I : FI.arguments())
     I.info = classify(I.type, FreeSSERegs, false);
 }
 
 Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                     QualType Ty) const {
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
                           CGF.getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(8),
                           /*allowHigherAlign*/ false);
 }
 
 // PowerPC-32
 namespace {
 /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
 class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
 bool IsSoftFloatABI;
 public:
   PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
       : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI)
       : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 };
 
 }
 
 // TODO: this implementation is now likely redundant with
 // DefaultABIInfo::EmitVAArg.
 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
   const unsigned OverflowLimit = 8;
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
     (void)CTy;
     return Address::invalid(); // FIXME?
   }
 
   // struct __va_list_tag {
   //   unsigned char gpr;
   //   unsigned char fpr;
   //   unsigned short reserved;
   //   void *overflow_arg_area;
   //   void *reg_save_area;
   // };
 
   bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
   bool isInt =
       Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType();
   bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64;
 
   // All aggregates are passed indirectly?  That doesn't seem consistent
   // with the argument-lowering code.
   bool isIndirect = Ty->isAggregateType();
 
   CGBuilderTy &Builder = CGF.Builder;
 
   // The calling convention either uses 1-2 GPRs or 1 FPR.
   Address NumRegsAddr = Address::invalid();
   if (isInt || IsSoftFloatABI) {
     NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr");
   } else {
     NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr");
   }
 
   llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
 
   // "Align" the register count when TY is i64.
   if (isI64 || (isF64 && IsSoftFloatABI)) {
     NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
     NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
   }
 
   llvm::Value *CC =
       Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond");
 
   llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
   llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow");
   llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
 
   Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
 
   llvm::Type *DirectTy = CGF.ConvertType(Ty);
   if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
 
   // Case 1: consume registers.
   Address RegAddr = Address::invalid();
   {
     CGF.EmitBlock(UsingRegs);
 
     Address RegSaveAreaPtr =
       Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8));
     RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr),
                       CharUnits::fromQuantity(8));
     assert(RegAddr.getElementType() == CGF.Int8Ty);
 
     // Floating-point registers start after the general-purpose registers.
     if (!(isInt || IsSoftFloatABI)) {
       RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
                                                    CharUnits::fromQuantity(32));
     }
 
     // Get the address of the saved value by scaling the number of
     // registers we've used by the number of 
     CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8);
     llvm::Value *RegOffset =
       Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
     RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty,
                                             RegAddr.getPointer(), RegOffset),
                       RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
     RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
 
     // Increase the used-register count.
     NumRegs =
       Builder.CreateAdd(NumRegs, 
                         Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1));
     Builder.CreateStore(NumRegs, NumRegsAddr);
 
     CGF.EmitBranch(Cont);
   }
 
   // Case 2: consume space in the overflow area.
   Address MemAddr = Address::invalid();
   {
     CGF.EmitBlock(UsingOverflow);
 
     Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr);
 
     // Everything in the overflow area is rounded up to a size of at least 4.
     CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
 
     CharUnits Size;
     if (!isIndirect) {
       auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
       Size = TypeInfo.first.alignTo(OverflowAreaAlign);
     } else {
       Size = CGF.getPointerSize();
     }
 
     Address OverflowAreaAddr =
       Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4));
     Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"),
                          OverflowAreaAlign);
     // Round up address of argument to alignment
     CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
     if (Align > OverflowAreaAlign) {
       llvm::Value *Ptr = OverflowArea.getPointer();
       OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align),
                                                            Align);
     }
  
     MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
 
     // Increase the overflow area.
     OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
     Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
     CGF.EmitBranch(Cont);
   }
 
   CGF.EmitBlock(Cont);
 
   // Merge the cases with a phi.
   Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow,
                                 "vaarg.addr");
 
   // Load the pointer if the argument was passed indirectly.
   if (isIndirect) {
     Result = Address(Builder.CreateLoad(Result, "aggr"),
                      getContext().getTypeAlignInChars(Ty));
   }
 
   return Result;
 }
 
 bool
 PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                                 llvm::Value *Address) const {
   // This is calculated from the LLVM and GCC tables and verified
   // against gcc output.  AFAIK all ABIs use the same encoding.
 
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
 
   llvm::IntegerType *i8 = CGF.Int8Ty;
   llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
   llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
   llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
 
   // 0-31: r0-31, the 4-byte general-purpose registers
   AssignToArrayRange(Builder, Address, Four8, 0, 31);
 
   // 32-63: fp0-31, the 8-byte floating-point registers
   AssignToArrayRange(Builder, Address, Eight8, 32, 63);
 
   // 64-76 are various 4-byte special-purpose registers:
   // 64: mq
   // 65: lr
   // 66: ctr
   // 67: ap
   // 68-75 cr0-7
   // 76: xer
   AssignToArrayRange(Builder, Address, Four8, 64, 76);
 
   // 77-108: v0-31, the 16-byte vector registers
   AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
 
   // 109: vrsave
   // 110: vscr
   // 111: spe_acc
   // 112: spefscr
   // 113: sfp
   AssignToArrayRange(Builder, Address, Four8, 109, 113);
 
   return false;
 }
 
 // PowerPC-64
 
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
 class PPC64_SVR4_ABIInfo : public ABIInfo {
 public:
   enum ABIKind {
     ELFv1 = 0,
     ELFv2
   };
 
 private:
   static const unsigned GPRBits = 64;
   ABIKind Kind;
   bool HasQPX;
+  bool IsSoftFloatABI;
 
   // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and
   // will be passed in a QPX register.
   bool IsQPXVectorTy(const Type *Ty) const {
     if (!HasQPX)
       return false;
 
     if (const VectorType *VT = Ty->getAs<VectorType>()) {
       unsigned NumElements = VT->getNumElements();
       if (NumElements == 1)
         return false;
 
       if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) {
         if (getContext().getTypeSize(Ty) <= 256)
           return true;
       } else if (VT->getElementType()->
                    isSpecificBuiltinType(BuiltinType::Float)) {
         if (getContext().getTypeSize(Ty) <= 128)
           return true;
       }
     }
 
     return false;
   }
 
   bool IsQPXVectorTy(QualType Ty) const {
     return IsQPXVectorTy(Ty.getTypePtr());
   }
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
-      : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX,
+                     bool SoftFloatABI)
+      : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX),
+        IsSoftFloatABI(SoftFloatABI) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
   CharUnits getParamTypeAlignment(QualType Ty) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
 
   // TODO: We can add more logic to computeInfo to improve performance.
   // Example: For aggregate arguments that fit in a register, we could
   // use getDirectInReg (as is done below for structs containing a single
   // floating-point value) to avoid pushing them to memory on function
   // entry.  This would require changing the logic in PPCISelLowering
   // when lowering the parameters in the caller and args in the callee.
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments()) {
       // We rely on the default argument classification for the most part.
       // One exception:  An aggregate containing a single floating-point
       // or vector item must be passed in a register if one is available.
       const Type *T = isSingleElementStruct(I.type, getContext());
       if (T) {
         const BuiltinType *BT = T->getAs<BuiltinType>();
         if (IsQPXVectorTy(T) ||
             (T->isVectorType() && getContext().getTypeSize(T) == 128) ||
             (BT && BT->isFloatingPoint())) {
           QualType QT(T, 0);
           I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
           continue;
         }
       }
       I.info = classifyArgumentType(I.type);
     }
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
 
 public:
   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
-                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
-      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {}
+                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX,
+                               bool SoftFloatABI)
+      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX,
+                                                 SoftFloatABI)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 };
 
 class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
 public:
   PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 };
 
 }
 
 // Return true if the ABI requires Ty to be passed sign- or zero-
 // extended to 64 bits.
 bool
 PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Promotable integer types are required to be promoted by the ABI.
   if (Ty->isPromotableIntegerType())
     return true;
 
   // In addition to the usual promotable integer types, we also need to
   // extend all 32-bit types, since the ABI requires promotion to 64 bits.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
     switch (BT->getKind()) {
     case BuiltinType::Int:
     case BuiltinType::UInt:
       return true;
     default:
       break;
     }
 
   return false;
 }
 
 /// isAlignedParamType - Determine whether a type requires 16-byte or
 /// higher alignment in the parameter area.  Always returns at least 8.
 CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
   // Complex types are passed just like their elements.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
     Ty = CTy->getElementType();
 
   // Only vector types of size 16 bytes need alignment (larger types are
   // passed via reference, smaller types are not aligned).
   if (IsQPXVectorTy(Ty)) {
     if (getContext().getTypeSize(Ty) > 128)
       return CharUnits::fromQuantity(32);
 
     return CharUnits::fromQuantity(16);
   } else if (Ty->isVectorType()) {
     return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
   }
 
   // For single-element float/vector structs, we consider the whole type
   // to have the same alignment requirements as its single element.
   const Type *AlignAsType = nullptr;
   const Type *EltType = isSingleElementStruct(Ty, getContext());
   if (EltType) {
     const BuiltinType *BT = EltType->getAs<BuiltinType>();
     if (IsQPXVectorTy(EltType) || (EltType->isVectorType() &&
          getContext().getTypeSize(EltType) == 128) ||
         (BT && BT->isFloatingPoint()))
       AlignAsType = EltType;
   }
 
   // Likewise for ELFv2 homogeneous aggregates.
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (!AlignAsType && Kind == ELFv2 &&
       isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
     AlignAsType = Base;
 
   // With special case aggregates, only vector base types need alignment.
   if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
     if (getContext().getTypeSize(AlignAsType) > 128)
       return CharUnits::fromQuantity(32);
 
     return CharUnits::fromQuantity(16);
   } else if (AlignAsType) {
     return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8);
   }
 
   // Otherwise, we only need alignment for any aggregate type that
   // has an alignment requirement of >= 16 bytes.
   if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
     if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
       return CharUnits::fromQuantity(32);
     return CharUnits::fromQuantity(16);
   }
 
   return CharUnits::fromQuantity(8);
 }
 
 /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
 /// aggregate.  Base is set to the base element type, and Members is set
 /// to the number of base elements.
 bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
                                      uint64_t &Members) const {
   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
     uint64_t NElements = AT->getSize().getZExtValue();
     if (NElements == 0)
       return false;
     if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
       return false;
     Members *= NElements;
   } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
     const RecordDecl *RD = RT->getDecl();
     if (RD->hasFlexibleArrayMember())
       return false;
 
     Members = 0;
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
       for (const auto &I : CXXRD->bases()) {
         // Ignore empty records.
         if (isEmptyRecord(getContext(), I.getType(), true))
           continue;
 
         uint64_t FldMembers;
         if (!isHomogeneousAggregate(I.getType(), Base, FldMembers))
           return false;
 
         Members += FldMembers;
       }
     }
 
     for (const auto *FD : RD->fields()) {
       // Ignore (non-zero arrays of) empty records.
       QualType FT = FD->getType();
       while (const ConstantArrayType *AT =
              getContext().getAsConstantArrayType(FT)) {
         if (AT->getSize().getZExtValue() == 0)
           return false;
         FT = AT->getElementType();
       }
       if (isEmptyRecord(getContext(), FT, true))
         continue;
 
       // For compatibility with GCC, ignore empty bitfields in C++ mode.
       if (getContext().getLangOpts().CPlusPlus &&
           FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
         continue;
 
       uint64_t FldMembers;
       if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
         return false;
 
       Members = (RD->isUnion() ?
                  std::max(Members, FldMembers) : Members + FldMembers);
     }
 
     if (!Base)
       return false;
 
     // Ensure there is no padding.
     if (getContext().getTypeSize(Base) * Members !=
         getContext().getTypeSize(Ty))
       return false;
   } else {
     Members = 1;
     if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
       Members = 2;
       Ty = CT->getElementType();
     }
 
     // Most ABIs only support float, double, and some vector type widths.
     if (!isHomogeneousAggregateBaseType(Ty))
       return false;
 
     // The base type must be the same for all members.  Types that
     // agree in both total size and mode (float vs. vector) are
     // treated as being equivalent here.
     const Type *TyPtr = Ty.getTypePtr();
     if (!Base) {
       Base = TyPtr;
       // If it's a non-power-of-2 vector, its size is already a power-of-2,
       // so make sure to widen it explicitly.
       if (const VectorType *VT = Base->getAs<VectorType>()) {
         QualType EltTy = VT->getElementType();
         unsigned NumElements =
             getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
         Base = getContext()
                    .getVectorType(EltTy, NumElements, VT->getVectorKind())
                    .getTypePtr();
       }
     }
 
     if (Base->isVectorType() != TyPtr->isVectorType() ||
         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
       return false;
   }
   return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
 }
 
 bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // Homogeneous aggregates for ELFv2 must have base types of float,
   // double, long double, or 128-bit vectors.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->getKind() == BuiltinType::Float ||
         BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
+        BT->getKind() == BuiltinType::LongDouble) {
+      if (IsSoftFloatABI)
+        return false;
       return true;
+    }
   }
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty))
       return true;
   }
   return false;
 }
 
 bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
     const Type *Base, uint64_t Members) const {
   // Vector types require one register, floating point types require one
   // or two registers depending on their size.
   uint32_t NumRegs =
       Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64;
 
   // Homogeneous Aggregates may occupy at most 8 registers.
   return Members * NumRegs <= 8;
 }
 
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   if (Ty->isAnyComplexType())
     return ABIArgInfo::getDirect();
 
   // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
   // or via reference (larger than 16 bytes).
   if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size > 128)
       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
     else if (Size < 128) {
       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
       return ABIArgInfo::getDirect(CoerceTy);
     }
   }
 
   if (isAggregateTypeForABI(Ty)) {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
     uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
     uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
 
     // ELFv2 homogeneous aggregates are passed as array types.
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (Kind == ELFv2 &&
         isHomogeneousAggregate(Ty, Base, Members)) {
       llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
       llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
     // If an aggregate may end up fully in registers, we do not
     // use the ByVal method, but pass the aggregate as array.
     // This is usually beneficial since we avoid forcing the
     // back-end to store the argument to memory.
     uint64_t Bits = getContext().getTypeSize(Ty);
     if (Bits > 0 && Bits <= 8 * GPRBits) {
       llvm::Type *CoerceTy;
 
       // Types up to 8 bytes are passed as integer type (which will be
       // properly aligned in the argument save area doubleword).
       if (Bits <= GPRBits)
         CoerceTy =
             llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       // Larger types are passed as arrays, with the base type selected
       // according to the required alignment in the save area.
       else {
         uint64_t RegBits = ABIAlign * 8;
         uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
         llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
         CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
       }
 
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
     // All other aggregates are passed ByVal.
     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
                                    /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
   return (isPromotableTypeForABI(Ty) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   if (RetTy->isAnyComplexType())
     return ABIArgInfo::getDirect();
 
   // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
   // or via reference (larger than 16 bytes).
   if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
     uint64_t Size = getContext().getTypeSize(RetTy);
     if (Size > 128)
       return getNaturalAlignIndirect(RetTy);
     else if (Size < 128) {
       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
       return ABIArgInfo::getDirect(CoerceTy);
     }
   }
 
   if (isAggregateTypeForABI(RetTy)) {
     // ELFv2 homogeneous aggregates are returned as array types.
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (Kind == ELFv2 &&
         isHomogeneousAggregate(RetTy, Base, Members)) {
       llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
       llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
     // ELFv2 small aggregates are returned in up to two registers.
     uint64_t Bits = getContext().getTypeSize(RetTy);
     if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
       if (Bits == 0)
         return ABIArgInfo::getIgnore();
 
       llvm::Type *CoerceTy;
       if (Bits > GPRBits) {
         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
         CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
       } else
         CoerceTy =
             llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
     // All other aggregates are returned indirectly.
     return getNaturalAlignIndirect(RetTy);
   }
 
   return (isPromotableTypeForABI(RetTy) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
 Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                       QualType Ty) const {
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
   TypeInfo.second = getParamTypeAlignment(Ty);
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
   // If we have a complex type and the base type is smaller than 8 bytes,
   // the ABI calls for the real and imaginary parts to be right-adjusted
   // in separate doublewords.  However, Clang expects us to produce a
   // pointer to a structure with the two parts packed tightly.  So generate
   // loads of the real and imaginary parts relative to the va_list pointer,
   // and store them to a temporary structure.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     CharUnits EltSize = TypeInfo.first / 2;
     if (EltSize < SlotSize) {
       Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
                                             SlotSize * 2, SlotSize,
                                             SlotSize, /*AllowHigher*/ true);
 
       Address RealAddr = Addr;
       Address ImagAddr = RealAddr;
       if (CGF.CGM.getDataLayout().isBigEndian()) {
         RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
                                                           SlotSize - EltSize);
         ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
                                                       2 * SlotSize - EltSize);
       } else {
         ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
       }
 
       llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
       RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
       ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
       llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
       llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
 
       Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
       CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
                              /*init*/ true);
       return Temp;
     }
   }
 
   // Otherwise, just use the general rule.
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
                           TypeInfo, SlotSize, /*AllowHigher*/ true);
 }
 
 static bool
 PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                               llvm::Value *Address) {
   // This is calculated from the LLVM and GCC tables and verified
   // against gcc output.  AFAIK all ABIs use the same encoding.
 
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
 
   llvm::IntegerType *i8 = CGF.Int8Ty;
   llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
   llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
   llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);
 
   // 0-31: r0-31, the 8-byte general-purpose registers
   AssignToArrayRange(Builder, Address, Eight8, 0, 31);
 
   // 32-63: fp0-31, the 8-byte floating-point registers
   AssignToArrayRange(Builder, Address, Eight8, 32, 63);
 
   // 64-76 are various 4-byte special-purpose registers:
   // 64: mq
   // 65: lr
   // 66: ctr
   // 67: ap
   // 68-75 cr0-7
   // 76: xer
   AssignToArrayRange(Builder, Address, Four8, 64, 76);
 
   // 77-108: v0-31, the 16-byte vector registers
   AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);
 
   // 109: vrsave
   // 110: vscr
   // 111: spe_acc
   // 112: spefscr
   // 113: sfp
   AssignToArrayRange(Builder, Address, Four8, 109, 113);
 
   return false;
 }
 
 bool
 PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
   CodeGen::CodeGenFunction &CGF,
   llvm::Value *Address) const {
 
   return PPC64_initDwarfEHRegSizeTable(CGF, Address);
 }
 
 bool
 PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                                 llvm::Value *Address) const {
 
   return PPC64_initDwarfEHRegSizeTable(CGF, Address);
 }
 
 //===----------------------------------------------------------------------===//
 // AArch64 ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class AArch64ABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
     DarwinPCS
   };
 
 private:
   ABIKind Kind;
 
 public:
   AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
     : SwiftABIInfo(CGT), Kind(Kind) {}
 
 private:
   ABIKind getABIKind() const { return Kind; }
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
 
   bool isIllegalVectorType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
     for (auto &it : FI.arguments())
       it.info = classifyArgumentType(it.type);
   }
 
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
   Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
                          CodeGenFunction &CGF) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
 
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
 };
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
       : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
 
   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
   }
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 31;
   }
 
   bool doesReturnSlotInterfereWithArgs() const override { return false; }
 };
 }
 
 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
     // Android promotes <2 x i8> to i16, not i32
     if (isAndroid() && (Size <= 16)) {
       llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size <= 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
       return ABIArgInfo::getDirect(ResType);
     }
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
   }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
     return (Ty->isPromotableIntegerType() && isDarwinPCS()
                 ? ABIArgInfo::getExtend()
                 : ABIArgInfo::getDirect());
   }
 
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
                                      CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
   // elsewhere for GNU compatibility.
   if (isEmptyRecord(getContext(), Ty, true)) {
     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
       return ABIArgInfo::getIgnore();
 
     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
   }
 
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(Ty, Base, Members)) {
     return ABIArgInfo::getDirect(
         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size <= 128) {
     unsigned Alignment = getContext().getTypeAlign(Ty);
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
 
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
     if (Alignment < 128 && Size == 128) {
       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
     }
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 }
 
 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
     return getNaturalAlignIndirect(RetTy);
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
     return (RetTy->isPromotableIntegerType() && isDarwinPCS()
                 ? ABIArgInfo::getExtend()
                 : ABIArgInfo::getDirect());
   }
 
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(RetTy, Base, Members))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
 
   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 128) {
     unsigned Alignment = getContext().getTypeAlign(RetTy);
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
 
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
     if (Alignment < 128 && Size == 128) {
       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
     }
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
   return getNaturalAlignIndirect(RetTy);
 }
 
 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // Check whether VT is legal.
     unsigned NumElements = VT->getNumElements();
     uint64_t Size = getContext().getTypeSize(VT);
     // NumElements should be power of 2.
     if (!llvm::isPowerOf2_32(NumElements))
       return true;
     return Size != 64 && (Size != 128 || NumElements == 1);
   }
   return false;
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the difference that any floating-point type is allowed,
   // including __fp16.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->isFloatingPoint())
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
     unsigned VecSize = getContext().getTypeSize(VT);
     if (VecSize == 64 || VecSize == 128)
       return true;
   }
   return false;
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
                                                        uint64_t Members) const {
   return Members <= 4;
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
                                             QualType Ty,
                                             CodeGenFunction &CGF) const {
   ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
 
   llvm::Type *BaseTy = CGF.ConvertType(Ty);
   if (IsIndirect)
     BaseTy = llvm::PointerType::getUnqual(BaseTy);
   else if (AI.getCoerceToType())
     BaseTy = AI.getCoerceToType();
 
   unsigned NumRegs = 1;
   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
   bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
   // struct {
   //   void *__stack;
   //   void *__gr_top;
   //   void *__vr_top;
   //   int __gr_offs;
   //   int __vr_offs;
   // };
 
   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
 
   auto TyInfo = getContext().getTypeInfoInChars(Ty);
   CharUnits TyAlign = TyInfo.second;
 
   Address reg_offs_p = Address::invalid();
   llvm::Value *reg_offs = nullptr;
   int reg_top_index;
   CharUnits reg_top_offset;
   int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity();
   if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p =
         CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24),
                                     "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
     reg_top_offset = CharUnits::fromQuantity(8);
     RegSize = llvm::alignTo(RegSize, 8);
   } else {
     // 4 is the field number of __vr_offs.
     reg_offs_p =
         CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28),
                                     "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
     reg_top_offset = CharUnits::fromQuantity(16);
     RegSize = 16 * NumRegs;
   }
 
   //=======================================
   // Find out where argument was passed
   //=======================================
 
   // If reg_offs >= 0 we're already using the stack for this type of
   // argument. We don't want to keep updating reg_offs (in case it overflows,
   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
   // whatever they get).
   llvm::Value *UsingStack = nullptr;
   UsingStack = CGF.Builder.CreateICmpSGE(
       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
 
   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
 
   // Otherwise, at least some kind of argument could go in these registers, the
   // question is whether this particular type is too big.
   CGF.EmitBlock(MaybeRegBlock);
 
   // Integer arguments may need to correct register alignment (for example a
   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
   // align __gr_offs to calculate the potential address.
   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
     int Align = TyAlign.getQuantity();
 
     reg_offs = CGF.Builder.CreateAdd(
         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
         "align_regoffs");
     reg_offs = CGF.Builder.CreateAnd(
         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
         "aligned_regoffs");
   }
 
   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
   // The fact that this is done unconditionally reflects the fact that
   // allocating an argument to the stack also uses up all the remaining
   // registers of the appropriate kind.
   llvm::Value *NewOffset = nullptr;
   NewOffset = CGF.Builder.CreateAdd(
       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
 
   // Now we're in a position to decide whether this argument really was in
   // registers or not.
   llvm::Value *InRegs = nullptr;
   InRegs = CGF.Builder.CreateICmpSLE(
       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
 
   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
 
   //=======================================
   // Argument was in registers
   //=======================================
 
   // Now we emit the code for if the argument was originally passed in
   // registers. First start the appropriate block:
   CGF.EmitBlock(InRegBlock);
 
   llvm::Value *reg_top = nullptr;
   Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index,
                                                   reg_top_offset, "reg_top_p");
   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs),
                    CharUnits::fromQuantity(IsFPR ? 16 : 8));
   Address RegAddr = Address::invalid();
   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
 
   if (IsIndirect) {
     // If it's been passed indirectly (actually a struct), whatever we find from
     // stored registers or on the stack will actually be a struct **.
     MemTy = llvm::PointerType::getUnqual(MemTy);
   }
 
   const Type *Base = nullptr;
   uint64_t NumMembers = 0;
   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
   if (IsHFA && NumMembers > 1) {
     // Homogeneous aggregates passed in registers will have their elements split
     // and stored 16-bytes apart regardless of size (they're notionally in qN,
     // qN+1, ...). We reload and store into a temporary local variable
     // contiguously.
     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
     Address Tmp = CGF.CreateTempAlloca(HFATy,
                                        std::max(TyAlign, BaseTyInfo.second));
 
     // On big-endian platforms, the value will be right-aligned in its slot.
     int Offset = 0;
     if (CGF.CGM.getDataLayout().isBigEndian() &&
         BaseTyInfo.first.getQuantity() < 16)
       Offset = 16 - BaseTyInfo.first.getQuantity();
 
     for (unsigned i = 0; i < NumMembers; ++i) {
       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
       Address LoadAddr =
         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
       LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy);
 
       Address StoreAddr =
         CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first);
 
       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
       CGF.Builder.CreateStore(Elem, StoreAddr);
     }
 
     RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy);
   } else {
     // Otherwise the object is contiguous in memory.
 
     // It might be right-aligned in its slot.
     CharUnits SlotSize = BaseAddr.getAlignment();
     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
         (IsHFA || !isAggregateTypeForABI(Ty)) &&
         TyInfo.first < SlotSize) {
       CharUnits Offset = SlotSize - TyInfo.first;
       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
     }
 
     RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy);
   }
 
   CGF.EmitBranch(ContBlock);
 
   //=======================================
   // Argument was on the stack
   //=======================================
   CGF.EmitBlock(OnStackBlock);
 
   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0,
                                                 CharUnits::Zero(), "stack_p");
   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
 
   // Again, stack arguments may need realignment. In this case both integer and
   // floating-point ones might be affected.
   if (!IsIndirect && TyAlign.getQuantity() > 8) {
     int Align = TyAlign.getQuantity();
 
     OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty);
 
     OnStackPtr = CGF.Builder.CreateAdd(
         OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
         "align_stack");
     OnStackPtr = CGF.Builder.CreateAnd(
         OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
         "align_stack");
 
     OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy);
   }
   Address OnStackAddr(OnStackPtr,
                       std::max(CharUnits::fromQuantity(8), TyAlign));
 
   // All stack slots are multiples of 8 bytes.
   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
   CharUnits StackSize;
   if (IsIndirect)
     StackSize = StackSlotSize;
   else
     StackSize = TyInfo.first.alignTo(StackSlotSize);
 
   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
   llvm::Value *NewStack =
       CGF.Builder.CreateInBoundsGEP(OnStackPtr, StackSizeC, "new_stack");
 
   // Write the new value of __stack for the next call to va_arg
   CGF.Builder.CreateStore(NewStack, stack_p);
 
   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
       TyInfo.first < StackSlotSize) {
     CharUnits Offset = StackSlotSize - TyInfo.first;
     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
   }
 
   OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy);
 
   CGF.EmitBranch(ContBlock);
 
   //=======================================
   // Tidy up
   //=======================================
   CGF.EmitBlock(ContBlock);
 
   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock,
                                  OnStackAddr, OnStackBlock, "vaargs.addr");
 
   if (IsIndirect)
     return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"),
                    TyInfo.second);
 
   return ResAddr;
 }
 
 Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                                         CodeGenFunction &CGF) const {
   // The backend's lowering doesn't support va_arg for aggregates or
   // illegal vector types.  Lower VAArg here for these cases and use
   // the LLVM va_arg instruction for everything else.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
     return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
   // Empty records are ignored for parameter passing purposes.
   if (isEmptyRecord(getContext(), Ty, true)) {
     Address Addr(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize);
     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
     return Addr;
   }
 
   // The size of the actual thing passed, which might end up just
   // being a pointer for indirect types.
   auto TyInfo = getContext().getTypeInfoInChars(Ty);
 
   // Arguments bigger than 16 bytes which aren't homogeneous
   // aggregates should be passed indirectly.
   bool IsIndirect = false;
   if (TyInfo.first.getQuantity() > 16) {
     const Type *Base = nullptr;
     uint64_t Members = 0;
     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
   }
 
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
                           TyInfo, SlotSize, /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
 // ARM ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class ARMABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     APCS = 0,
     AAPCS = 1,
     AAPCS_VFP = 2,
     AAPCS16_VFP = 3,
   };
 
 private:
   ABIKind Kind;
 
 public:
   ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
       : SwiftABIInfo(CGT), Kind(_Kind) {
     setCCs();
   }
 
   bool isEABI() const {
     switch (getTarget().getTriple().getEnvironment()) {
     case llvm::Triple::Android:
     case llvm::Triple::EABI:
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::MuslEABI:
     case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
     }
   }
 
   bool isEABIHF() const {
     switch (getTarget().getTriple().getEnvironment()) {
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
     }
   }
 
   ABIKind getABIKind() const { return Kind; }
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
   bool isIllegalVectorType(QualType Ty) const;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setCCs();
 
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
     :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {}
 
   const ARMABIInfo &getABIInfo() const {
     return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
   }
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 13;
   }
 
   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue";
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override {
     llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
 
     // 0-15 are the 16 integer registers.
     AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15);
     return false;
   }
 
   unsigned getSizeOfUnwindException() const override {
     if (getABIInfo().isEABI()) return 88;
     return TargetCodeGenInfo::getSizeOfUnwindException();
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD)
       return;
 
     const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>();
     if (!Attr)
       return;
 
     const char *Kind;
     switch (Attr->getInterrupt()) {
     case ARMInterruptAttr::Generic: Kind = ""; break;
     case ARMInterruptAttr::IRQ:     Kind = "IRQ"; break;
     case ARMInterruptAttr::FIQ:     Kind = "FIQ"; break;
     case ARMInterruptAttr::SWI:     Kind = "SWI"; break;
     case ARMInterruptAttr::ABORT:   Kind = "ABORT"; break;
     case ARMInterruptAttr::UNDEF:   Kind = "UNDEF"; break;
     }
 
     llvm::Function *Fn = cast<llvm::Function>(GV);
 
     Fn->addFnAttr("interrupt", Kind);
 
     ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind();
     if (ABI == ARMABIInfo::APCS)
       return;
 
     // AAPCS guarantees that sp will be 8-byte aligned on any public interface,
     // however this is not necessarily true on taking any interrupt. Instruct
     // the backend to perform a realignment as part of the function prologue.
     llvm::AttrBuilder B;
     B.addStackAlignmentAttr(8);
     Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
                       llvm::AttributeSet::get(CGM.getLLVMContext(),
                                               llvm::AttributeSet::FunctionIndex,
                                               B));
   }
 };
 
 class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
 public:
   WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
       : ARMTargetCodeGenInfo(CGT, K) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
   }
 
   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
                                llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
 };
 
 void WindowsARMTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
   ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 }
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() =
         classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type, FI.isVariadic());
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
   llvm::CallingConv::ID cc = getRuntimeCC();
   if (cc != llvm::CallingConv::C)
     FI.setEffectiveCallingConvention(cc);
 }
 
 /// Return the default calling convention that LLVM will use.
 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
   // The default calling convention that LLVM will infer.
   if (isEABIHF() || getTarget().getTriple().isWatchABI())
     return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
     return llvm::CallingConv::ARM_AAPCS;
   else
     return llvm::CallingConv::ARM_APCS;
 }
 
 /// Return the calling convention that our ABI would like us to use
 /// as the C calling convention.
 llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
   switch (getABIKind()) {
   case APCS: return llvm::CallingConv::ARM_APCS;
   case AAPCS: return llvm::CallingConv::ARM_AAPCS;
   case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
   case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
   }
   llvm_unreachable("bad ABI kind");
 }
 
 void ARMABIInfo::setCCs() {
   assert(getRuntimeCC() == llvm::CallingConv::C);
 
   // Don't muddy up the IR with a ton of explicit annotations if
   // they'd just match what LLVM will infer from the triple.
   llvm::CallingConv::ID abiCC = getABIDefaultCC();
   if (abiCC != getLLVMDefaultCC())
     RuntimeCC = abiCC;
 
   // AAPCS apparently requires runtime support functions to be soft-float, but
   // that's almost certainly for historic reasons (Thumb1 not supporting VFP
   // most likely). It's more convenient for AAPCS16_VFP to be hard-float.
   switch (getABIKind()) {
   case APCS:
   case AAPCS16_VFP:
     if (abiCC != getLLVMDefaultCC())
       BuiltinCC = abiCC;
     break;
   case AAPCS:
   case AAPCS_VFP:
     BuiltinCC = llvm::CallingConv::ARM_AAPCS;
     break;
   }
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
                                             bool isVariadic) const {
   // 6.1.2.1 The following argument types are VFP CPRCs:
   //   A single-precision floating-point type (including promoted
   //   half-precision types); A double-precision floating-point type;
   //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
   //   with a Base Type of a single- or double-precision floating-point type,
   //   64-bit containerized vectors or 128-bit containerized vectors with one
   //   to four Elements.
   bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic;
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size <= 32) {
       llvm::Type *ResType =
           llvm::Type::getInt32Ty(getVMContext());
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 2);
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
       return ABIArgInfo::getDirect(ResType);
     }
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
   }
 
   // __fp16 gets passed as if it were an int or float, but with the top 16 bits
   // unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
   if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
     return ABIArgInfo::getDirect(ResType);
   }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
       Ty = EnumTy->getDecl()->getIntegerType();
     }
 
     return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
                                           : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
   }
 
   // Ignore empty records.
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
   if (IsEffectivelyAAPCS_VFP) {
     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
     // into VFP registers.
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Base can be a floating-point or a vector.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
     }
   } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
     // WatchOS does have homogeneous aggregates. Note that we intentionally use
     // this convention even for a variadic function: the backend will use GPRs
     // if needed.
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
       llvm::Type *Ty =
         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
       return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
     }
   }
 
   if (getABIKind() == ARMABIInfo::AAPCS16_VFP &&
       getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) {
     // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're
     // bigger than 128-bits, they get placed in space allocated by the caller,
     // and a pointer is passed.
     return ABIArgInfo::getIndirect(
         CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false);
   }
 
   // Support byval for ARM.
   // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at
   // most 8-byte. We realign the indirect argument if type alignment is bigger
   // than ABI alignment.
   uint64_t ABIAlign = 4;
   uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
   if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
        getABIKind() == ARMABIInfo::AAPCS)
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
 
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
     assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
     return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
                                    /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
   // Otherwise, pass by coercing to a structure of the appropriate size.
   llvm::Type* ElemTy;
   unsigned SizeRegs;
   // FIXME: Try to match the types of the arguments more accurately where
   // we can.
   if (getContext().getTypeAlign(Ty) <= 32) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
   } else {
     ElemTy = llvm::Type::getInt64Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
   }
 
   return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
 }
 
 static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
                               llvm::LLVMContext &VMContext) {
   // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure
   // is called integer-like if its size is less than or equal to one word, and
   // the offset of each of its addressable sub-fields is zero.
 
   uint64_t Size = Context.getTypeSize(Ty);
 
   // Check that the type fits in a word.
   if (Size > 32)
     return false;
 
   // FIXME: Handle vector types!
   if (Ty->isVectorType())
     return false;
 
   // Float types are never treated as "integer like".
   if (Ty->isRealFloatingType())
     return false;
 
   // If this is a builtin or pointer type then it is ok.
   if (Ty->getAs<BuiltinType>() || Ty->isPointerType())
     return true;
 
   // Small complex integer types are "integer like".
   if (const ComplexType *CT = Ty->getAs<ComplexType>())
     return isIntegerLikeType(CT->getElementType(), Context, VMContext);
 
   // Single element and zero sized arrays should be allowed, by the definition
   // above, but they are not.
 
   // Otherwise, it must be a record type.
   const RecordType *RT = Ty->getAs<RecordType>();
   if (!RT) return false;
 
   // Ignore records with flexible arrays.
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
     return false;
 
   // Check that all sub-fields are at offset 0, and are themselves "integer
   // like".
   const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
   bool HadField = false;
   unsigned idx = 0;
   for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
        i != e; ++i, ++idx) {
     const FieldDecl *FD = *i;
 
     // Bit-fields are not addressable, we only need to verify they are "integer
     // like". We still have to disallow a subsequent non-bitfield, for example:
     //   struct { int : 0; int x }
     // is non-integer like according to gcc.
     if (FD->isBitField()) {
       if (!RD->isUnion())
         HadField = true;
 
       if (!isIntegerLikeType(FD->getType(), Context, VMContext))
         return false;
 
       continue;
     }
 
     // Check if this field is at offset 0.
     if (Layout.getFieldOffset(idx) != 0)
       return false;
 
     if (!isIntegerLikeType(FD->getType(), Context, VMContext))
       return false;
 
     // Only allow at most one field in a structure. This doesn't match the
     // wording above, but follows gcc in situations with a field following an
     // empty structure.
     if (!RD->isUnion()) {
       if (HadField)
         return false;
 
       HadField = true;
     }
   }
 
   return true;
 }
 
 ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
                                           bool isVariadic) const {
   bool IsEffectivelyAAPCS_VFP =
       (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic;
 
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
     return getNaturalAlignIndirect(RetTy);
   }
 
   // __fp16 gets returned as if it were an int or float, but with the top 16
   // bits unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
   if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
     return ABIArgInfo::getDirect(ResType);
   }
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
     return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend()
                                             : ABIArgInfo::getDirect();
   }
 
   // Are we following APCS?
   if (getABIKind() == APCS) {
     if (isEmptyRecord(getContext(), RetTy, false))
       return ABIArgInfo::getIgnore();
 
     // Complex types are all returned as packed integers.
     //
     // FIXME: Consider using 2 x vector types if the back end handles them
     // correctly.
     if (RetTy->isAnyComplexType())
       return ABIArgInfo::getDirect(llvm::IntegerType::get(
           getVMContext(), getContext().getTypeSize(RetTy)));
 
     // Integer like structures are returned in r0.
     if (isIntegerLikeType(RetTy, getContext(), getVMContext())) {
       // Return in the smallest viable integer type.
       uint64_t Size = getContext().getTypeSize(RetTy);
       if (Size <= 8)
         return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
       if (Size <= 16)
         return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
     }
 
     // Otherwise return in memory.
     return getNaturalAlignIndirect(RetTy);
   }
 
   // Otherwise this is an AAPCS variant.
 
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (IsEffectivelyAAPCS_VFP) {
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(RetTy, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
     }
   }
 
   // Aggregates <= 4 bytes are returned in r0; other aggregates
   // are returned indirectly.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 32) {
     if (getDataLayout().isBigEndian())
       // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
     // Return in the smallest viable integer type.
     if (Size <= 8)
       return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
     if (Size <= 16)
       return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
     return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
   } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
     llvm::Type *CoerceTy =
         llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
     return ABIArgInfo::getDirect(CoerceTy);
   }
 
   return getNaturalAlignIndirect(RetTy);
 }
 
 /// isIllegalVector - check whether Ty is an illegal vector type.
 bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
   if (const VectorType *VT = Ty->getAs<VectorType> ()) {
     if (isAndroid()) {
       // Android shipped using Clang 3.1, which supported a slightly different
       // vector ABI. The primary differences were that 3-element vector types
       // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
       // accepts that legacy behavior for Android only.
       // Check whether VT is legal.
       unsigned NumElements = VT->getNumElements();
       // NumElements should be power of 2 or equal to 3.
       if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
         return true;
     } else {
       // Check whether VT is legal.
       unsigned NumElements = VT->getNumElements();
       uint64_t Size = getContext().getTypeSize(VT);
       // NumElements should be power of 2.
       if (!llvm::isPowerOf2_32(NumElements))
         return true;
       // Size should be greater than 32 bits.
       return Size <= 32;
     }
   }
   return false;
 }
 
 bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // Homogeneous aggregates for AAPCS-VFP must have base types of float,
   // double, or 64-bit or 128-bit vectors.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->getKind() == BuiltinType::Float ||
         BT->getKind() == BuiltinType::Double ||
         BT->getKind() == BuiltinType::LongDouble)
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
     unsigned VecSize = getContext().getTypeSize(VT);
     if (VecSize == 64 || VecSize == 128)
       return true;
   }
   return false;
 }
 
 bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
                                                    uint64_t Members) const {
   return Members <= 4;
 }
 
 Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                               QualType Ty) const {
   CharUnits SlotSize = CharUnits::fromQuantity(4);
 
   // Empty records are ignored for parameter passing purposes.
   if (isEmptyRecord(getContext(), Ty, true)) {
     Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize);
     Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
     return Addr;
   }
 
   auto TyInfo = getContext().getTypeInfoInChars(Ty);
   CharUnits TyAlignForABI = TyInfo.second;
 
   // Use indirect if size of the illegal vector is bigger than 16 bytes.
   bool IsIndirect = false;
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
     IsIndirect = true;
 
   // ARMv7k passes structs bigger than 16 bytes indirectly, in space
   // allocated by the caller.
   } else if (TyInfo.first > CharUnits::fromQuantity(16) &&
              getABIKind() == ARMABIInfo::AAPCS16_VFP &&
              !isHomogeneousAggregate(Ty, Base, Members)) {
     IsIndirect = true;
 
   // Otherwise, bound the type's ABI alignment.
   // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
   // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
   // Our callers should be prepared to handle an under-aligned address.
   } else if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
              getABIKind() == ARMABIInfo::AAPCS) {
     TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
     TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8));
   } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
     // ARMv7k allows type alignment up to 16 bytes.
     TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
     TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16));
   } else {
     TyAlignForABI = CharUnits::fromQuantity(4);
   }
   TyInfo.second = TyAlignForABI;
 
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
                           SlotSize, /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class NVPTXABIInfo : public ABIInfo {
 public:
   NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 private:
   // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
   // resulting MDNode to the nvvm.annotations MDNode.
   static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);
 };
 
 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // note: this is different from default ABI
   if (!RetTy->isScalarType())
     return ABIArgInfo::getDirect();
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
   return (RetTy->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Return aggregates type as indirect by value
   if (isAggregateTypeForABI(Ty))
     return getNaturalAlignIndirect(Ty, /* byval */ true);
 
   return (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type);
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
   FI.setEffectiveCallingConvention(getRuntimeCC());
 }
 
 Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   llvm_unreachable("NVPTX does not support varargs");
 }
 
 void NVPTXTargetCodeGenInfo::
 setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                     CodeGen::CodeGenModule &M) const{
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
 
   llvm::Function *F = cast<llvm::Function>(GV);
 
   // Perform special handling in OpenCL mode
   if (M.getLangOpts().OpenCL) {
     // Use OpenCL function attributes to check for kernel functions
     // By default, all functions are device functions
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL __kernel functions get kernel metadata
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       addNVVMMetadata(F, "kernel", 1);
       // And kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
     }
   }
 
   // Perform special handling in CUDA mode.
   if (M.getLangOpts().CUDA) {
     // CUDA __global__ functions get a kernel metadata entry.  Since
     // __global__ functions cannot be called from the device, we do not
     // need to set the noinline attribute.
     if (FD->hasAttr<CUDAGlobalAttr>()) {
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       addNVVMMetadata(F, "kernel", 1);
     }
     if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
       llvm::APSInt MaxThreads(32);
       MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
       if (MaxThreads > 0)
         addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
 
       // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
       // not specified in __launch_bounds__ or if the user specified a 0 value,
       // we don't have to add a PTX directive.
       if (Attr->getMinBlocks()) {
         llvm::APSInt MinBlocks(32);
         MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
         if (MinBlocks > 0)
           // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
           addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
       }
     }
   }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
                                              int Operand) {
   llvm::Module *M = F->getParent();
   llvm::LLVMContext &Ctx = M->getContext();
 
   // Get "nvvm.annotations" metadata node
   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
 
   llvm::Metadata *MDVals[] = {
       llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name),
       llvm::ConstantAsMetadata::get(
           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
   // Append metadata to nvvm.annotations
   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
 }
 }
 
 //===----------------------------------------------------------------------===//
 // SystemZ ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class SystemZABIInfo : public SwiftABIInfo {
   bool HasVector;
 
 public:
   SystemZABIInfo(CodeGenTypes &CGT, bool HV)
     : SwiftABIInfo(CGT), HasVector(HV) {}
 
   bool isPromotableIntegerType(QualType Ty) const;
   bool isCompoundType(QualType Ty) const;
   bool isVectorArgumentType(QualType Ty) const;
   bool isFPArgumentType(QualType Ty) const;
   QualType GetSingleElementType(QualType Ty) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType ArgTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments())
       I.info = classifyArgumentType(I.type);
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   bool shouldPassIndirectlyForSwift(CharUnits totalSize,
                                     ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
     return occupiesMoreThan(CGT, scalars, /*total*/ 4);
   }
 };
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector)
     : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {}
 };
 
 }
 
 bool SystemZABIInfo::isPromotableIntegerType(QualType Ty) const {
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Promotable integer types are required to be promoted by the ABI.
   if (Ty->isPromotableIntegerType())
     return true;
 
   // 32-bit values must also be promoted.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
     switch (BT->getKind()) {
     case BuiltinType::Int:
     case BuiltinType::UInt:
       return true;
     default:
       return false;
     }
   return false;
 }
 
 bool SystemZABIInfo::isCompoundType(QualType Ty) const {
   return (Ty->isAnyComplexType() ||
           Ty->isVectorType() ||
           isAggregateTypeForABI(Ty));
 }
 
 bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
   return (HasVector &&
           Ty->isVectorType() &&
           getContext().getTypeSize(Ty) <= 128);
 }
 
 bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
     switch (BT->getKind()) {
     case BuiltinType::Float:
     case BuiltinType::Double:
       return true;
     default:
       return false;
     }
 
   return false;
 }
 
 QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
   if (const RecordType *RT = Ty->getAsStructureType()) {
     const RecordDecl *RD = RT->getDecl();
     QualType Found;
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
       for (const auto &I : CXXRD->bases()) {
         QualType Base = I.getType();
 
         // Empty bases don't affect things either way.
         if (isEmptyRecord(getContext(), Base, true))
           continue;
 
         if (!Found.isNull())
           return Ty;
         Found = GetSingleElementType(Base);
       }
 
     // Check the fields.
     for (const auto *FD : RD->fields()) {
       // For compatibility with GCC, ignore empty bitfields in C++ mode.
       // Unlike isSingleElementStruct(), empty structure and array fields
       // do count.  So do anonymous bitfields that aren't zero-sized.
       if (getContext().getLangOpts().CPlusPlus &&
           FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
         continue;
 
       // Unlike isSingleElementStruct(), arrays do not count.
       // Nested structures still do though.
       if (!Found.isNull())
         return Ty;
       Found = GetSingleElementType(FD->getType());
     }
 
     // Unlike isSingleElementStruct(), trailing padding is allowed.
     // An 8-byte aligned struct s { float f; } is passed as a double.
     if (!Found.isNull())
       return Found;
   }
 
   return Ty;
 }
 
 Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                   QualType Ty) const {
   // Assume that va_list type is correct; should be pointer to LLVM type:
   // struct {
   //   i64 __gpr;
   //   i64 __fpr;
   //   i8 *__overflow_arg_area;
   //   i8 *__reg_save_area;
   // };
 
   // Every non-vector argument occupies 8 bytes and is passed by preference
   // in either GPRs or FPRs.  Vector arguments occupy 8 or 16 bytes and are
   // always passed on the stack.
   Ty = getContext().getCanonicalType(Ty);
   auto TyInfo = getContext().getTypeInfoInChars(Ty);
   llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
   llvm::Type *DirectTy = ArgTy;
   ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
   bool InFPRs = false;
   bool IsVector = false;
   CharUnits UnpaddedSize;
   CharUnits DirectAlign;
   if (IsIndirect) {
     DirectTy = llvm::PointerType::getUnqual(DirectTy);
     UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
   } else {
     if (AI.getCoerceToType())
       ArgTy = AI.getCoerceToType();
     InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy();
     IsVector = ArgTy->isVectorTy();
     UnpaddedSize = TyInfo.first;
     DirectAlign = TyInfo.second;
   }
   CharUnits PaddedSize = CharUnits::fromQuantity(8);
   if (IsVector && UnpaddedSize > PaddedSize)
     PaddedSize = CharUnits::fromQuantity(16);
   assert((UnpaddedSize <= PaddedSize) && "Invalid argument size.");
 
   CharUnits Padding = (PaddedSize - UnpaddedSize);
 
   llvm::Type *IndexTy = CGF.Int64Ty;
   llvm::Value *PaddedSizeV =
     llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity());
 
   if (IsVector) {
     // Work out the address of a vector argument on the stack.
     // Vector arguments are always passed in the high bits of a
     // single (8 byte) or double (16 byte) stack slot.
     Address OverflowArgAreaPtr =
       CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16),
                                   "overflow_arg_area_ptr");
     Address OverflowArgArea =
       Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
               TyInfo.second);
     Address MemAddr =
       CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");
 
     // Update overflow_arg_area_ptr pointer
     llvm::Value *NewOverflowArgArea =
       CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV,
                             "overflow_arg_area");
     CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
 
     return MemAddr;
   }
 
   assert(PaddedSize.getQuantity() == 8);
 
   unsigned MaxRegs, RegCountField, RegSaveIndex;
   CharUnits RegPadding;
   if (InFPRs) {
     MaxRegs = 4; // Maximum of 4 FPR arguments
     RegCountField = 1; // __fpr
     RegSaveIndex = 16; // save offset for f0
     RegPadding = CharUnits(); // floats are passed in the high bits of an FPR
   } else {
     MaxRegs = 5; // Maximum of 5 GPR arguments
     RegCountField = 0; // __gpr
     RegSaveIndex = 2; // save offset for r2
     RegPadding = Padding; // values are passed in the low bits of a GPR
   }
 
   Address RegCountPtr = CGF.Builder.CreateStructGEP(
       VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8),
       "reg_count_ptr");
   llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
   llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
                                                  "fits_in_regs");
 
   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
   CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
 
   // Emit code to load the value if it was passed in registers.
   CGF.EmitBlock(InRegBlock);
 
   // Work out the address of an argument register.
   llvm::Value *ScaledRegCount =
     CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
   llvm::Value *RegBase =
     llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity()
                                       + RegPadding.getQuantity());
   llvm::Value *RegOffset =
     CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
   Address RegSaveAreaPtr =
       CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24),
                                   "reg_save_area_ptr");
   llvm::Value *RegSaveArea =
     CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
   Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset,
                                            "raw_reg_addr"),
                      PaddedSize);
   Address RegAddr =
     CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr");
 
   // Update the register count
   llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1);
   llvm::Value *NewRegCount =
     CGF.Builder.CreateAdd(RegCount, One, "reg_count");
   CGF.Builder.CreateStore(NewRegCount, RegCountPtr);
   CGF.EmitBranch(ContBlock);
 
   // Emit code to load the value if it was passed in memory.
   CGF.EmitBlock(InMemBlock);
 
   // Work out the address of a stack argument.
   Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(
       VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr");
   Address OverflowArgArea =
     Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
             PaddedSize);
   Address RawMemAddr =
     CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr");
   Address MemAddr =
     CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr");
 
   // Update overflow_arg_area_ptr pointer
   llvm::Value *NewOverflowArgArea =
     CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV,
                           "overflow_arg_area");
   CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
   CGF.EmitBranch(ContBlock);
 
   // Return the appropriate result.
   CGF.EmitBlock(ContBlock);
   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock,
                                  MemAddr, InMemBlock, "va_arg.addr");
 
   if (IsIndirect)
     ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"),
                       TyInfo.second);
 
   return ResAddr;
 }
 
 ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
   if (isVectorArgumentType(RetTy))
     return ABIArgInfo::getDirect();
   if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
     return getNaturalAlignIndirect(RetTy);
   return (isPromotableIntegerType(RetTy) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
   // Handle the generic C++ ABI.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // Integers and enums are extended to full register width.
   if (isPromotableIntegerType(Ty))
     return ABIArgInfo::getExtend();
 
   // Handle vector types and vector-like structure types.  Note that
   // as opposed to float-like structure types, we do not allow any
   // padding for vector-like structures, so verify the sizes match.
   uint64_t Size = getContext().getTypeSize(Ty);
   QualType SingleElementTy = GetSingleElementType(Ty);
   if (isVectorArgumentType(SingleElementTy) &&
       getContext().getTypeSize(SingleElementTy) == Size)
     return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy));
 
   // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
   if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   // Handle small structures.
   if (const RecordType *RT = Ty->getAs<RecordType>()) {
     // Structures with flexible arrays have variable length, so really
     // fail the size test above.
     const RecordDecl *RD = RT->getDecl();
     if (RD->hasFlexibleArrayMember())
       return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
     // The structure is passed as an unextended integer, a float, or a double.
     llvm::Type *PassTy;
     if (isFPArgumentType(SingleElementTy)) {
       assert(Size == 32 || Size == 64);
       if (Size == 32)
         PassTy = llvm::Type::getFloatTy(getVMContext());
       else
         PassTy = llvm::Type::getDoubleTy(getVMContext());
     } else
       PassTy = llvm::IntegerType::get(getVMContext(), Size);
     return ABIArgInfo::getDirect(PassTy);
   }
 
   // Non-structure compounds are passed indirectly.
   if (isCompoundType(Ty))
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   return ABIArgInfo::getDirect(nullptr);
 }
 
 //===----------------------------------------------------------------------===//
 // MSP430 ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class MSP430TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 };
 
 }
 
 void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                              CodeGen::CodeGenModule &M) const {
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
       // Handle 'interrupt' attribute:
       llvm::Function *F = cast<llvm::Function>(GV);
 
       // Step 1: Set ISR calling convention.
       F->setCallingConv(llvm::CallingConv::MSP430_INTR);
 
       // Step 2: Add attributes goodness.
       F->addFnAttr(llvm::Attribute::NoInline);
 
       // Step 3: Emit ISR vector alias.
       unsigned Num = attr->getNumber() / 2;
       llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
                                 "__isr_" + Twine(Num), F);
     }
   }
 }
 
 //===----------------------------------------------------------------------===//
 // MIPS ABI Implementation.  This works for both little-endian and
 // big-endian variants.
 //===----------------------------------------------------------------------===//
 
 namespace {
 class MipsABIInfo : public ABIInfo {
   bool IsO32;
   unsigned MinABIStackAlignInBytes, StackAlignInBytes;
   void CoerceToIntArgs(uint64_t TySize,
                        SmallVectorImpl<llvm::Type *> &ArgList) const;
   llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
   llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const;
   llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
 public:
   MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) :
     ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8),
     StackAlignInBytes(IsO32 ? 8 : 16) {}
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
   bool shouldSignExtUnsignedType(QualType Ty) const override;
 };
 
 class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
   unsigned SizeOfUnwindException;
 public:
   MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32)
     : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)),
       SizeOfUnwindException(IsO32 ? 24 : 32) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 29;
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD) return;
     llvm::Function *Fn = cast<llvm::Function>(GV);
     if (FD->hasAttr<Mips16Attr>()) {
       Fn->addFnAttr("mips16");
     }
     else if (FD->hasAttr<NoMips16Attr>()) {
       Fn->addFnAttr("nomips16");
     }
 
     const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
     if (!Attr)
       return;
 
     const char *Kind;
     switch (Attr->getInterrupt()) {
     case MipsInterruptAttr::eic:     Kind = "eic"; break;
     case MipsInterruptAttr::sw0:     Kind = "sw0"; break;
     case MipsInterruptAttr::sw1:     Kind = "sw1"; break;
     case MipsInterruptAttr::hw0:     Kind = "hw0"; break;
     case MipsInterruptAttr::hw1:     Kind = "hw1"; break;
     case MipsInterruptAttr::hw2:     Kind = "hw2"; break;
     case MipsInterruptAttr::hw3:     Kind = "hw3"; break;
     case MipsInterruptAttr::hw4:     Kind = "hw4"; break;
     case MipsInterruptAttr::hw5:     Kind = "hw5"; break;
     }
 
     Fn->addFnAttr("interrupt", Kind);
 
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 
   unsigned getSizeOfUnwindException() const override {
     return SizeOfUnwindException;
   }
 };
 }
 
 void MipsABIInfo::CoerceToIntArgs(
     uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
   llvm::IntegerType *IntTy =
     llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
 
   // Add (TySize / MinABIStackAlignInBytes) args of IntTy.
   for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
     ArgList.push_back(IntTy);
 
   // If necessary, add one more integer type to ArgList.
   unsigned R = TySize % (MinABIStackAlignInBytes * 8);
 
   if (R)
     ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
 }
 
 // In N32/64, an aligned double precision floating point field is passed in
 // a register.
 llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
   SmallVector<llvm::Type*, 8> ArgList, IntArgList;
 
   if (IsO32) {
     CoerceToIntArgs(TySize, ArgList);
     return llvm::StructType::get(getVMContext(), ArgList);
   }
 
   if (Ty->isComplexType())
     return CGT.ConvertType(Ty);
 
   const RecordType *RT = Ty->getAs<RecordType>();
 
   // Unions/vectors are passed in integer registers.
   if (!RT || !RT->isStructureOrClassType()) {
     CoerceToIntArgs(TySize, ArgList);
     return llvm::StructType::get(getVMContext(), ArgList);
   }
 
   const RecordDecl *RD = RT->getDecl();
   const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
   assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
 
   uint64_t LastOffset = 0;
   unsigned idx = 0;
   llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
 
   // Iterate over fields in the struct/class and check if there are any aligned
   // double fields.
   for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
        i != e; ++i, ++idx) {
     const QualType Ty = i->getType();
     const BuiltinType *BT = Ty->getAs<BuiltinType>();
 
     if (!BT || BT->getKind() != BuiltinType::Double)
       continue;
 
     uint64_t Offset = Layout.getFieldOffset(idx);
     if (Offset % 64) // Ignore doubles that are not aligned.
       continue;
 
     // Add ((Offset - LastOffset) / 64) args of type i64.
     for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
       ArgList.push_back(I64);
 
     // Add double type.
     ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
     LastOffset = Offset + 64;
   }
 
   CoerceToIntArgs(TySize - LastOffset, IntArgList);
   ArgList.append(IntArgList.begin(), IntArgList.end());
 
   return llvm::StructType::get(getVMContext(), ArgList);
 }
 
 llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
                                         uint64_t Offset) const {
   if (OrigOffset + MinABIStackAlignInBytes > Offset)
     return nullptr;
 
   return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
 }
 
 ABIArgInfo
 MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   uint64_t OrigOffset = Offset;
   uint64_t TySize = getContext().getTypeSize(Ty);
   uint64_t Align = getContext().getTypeAlign(Ty) / 8;
 
   Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
                    (uint64_t)StackAlignInBytes);
   unsigned CurrOffset = llvm::alignTo(Offset, Align);
   Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
 
   if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) {
     // Ignore empty aggregates.
     if (TySize == 0)
       return ABIArgInfo::getIgnore();
 
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
       Offset = OrigOffset + MinABIStackAlignInBytes;
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
     // If we have reached here, aggregates are passed directly by coercing to
     // another structure type. Padding is inserted if the offset of the
     // aggregate is unaligned.
     ABIArgInfo ArgInfo =
         ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
                               getPaddingType(OrigOffset, CurrOffset));
     ArgInfo.setInReg(true);
     return ArgInfo;
   }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // All integral types are promoted to the GPR width.
   if (Ty->isIntegralOrEnumerationType())
     return ABIArgInfo::getExtend();
 
   return ABIArgInfo::getDirect(
       nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
 }
 
 llvm::Type*
 MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const {
   const RecordType *RT = RetTy->getAs<RecordType>();
   SmallVector<llvm::Type*, 8> RTList;
 
   if (RT && RT->isStructureOrClassType()) {
     const RecordDecl *RD = RT->getDecl();
     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
     unsigned FieldCnt = Layout.getFieldCount();
 
     // N32/64 returns struct/classes in floating point registers if the
     // following conditions are met:
     // 1. The size of the struct/class is no larger than 128-bit.
     // 2. The struct/class has one or two fields all of which are floating
     //    point types.
     // 3. The offset of the first field is zero (this follows what gcc does).
     //
     // Any other composite results are returned in integer registers.
     //
     if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) {
       RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end();
       for (; b != e; ++b) {
         const BuiltinType *BT = b->getType()->getAs<BuiltinType>();
 
         if (!BT || !BT->isFloatingPoint())
           break;
 
         RTList.push_back(CGT.ConvertType(b->getType()));
       }
 
       if (b == e)
         return llvm::StructType::get(getVMContext(), RTList,
                                      RD->hasAttr<PackedAttr>());
 
       RTList.clear();
     }
   }
 
   CoerceToIntArgs(Size, RTList);
   return llvm::StructType::get(getVMContext(), RTList);
 }
 
 ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
   uint64_t Size = getContext().getTypeSize(RetTy);
 
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // O32 doesn't treat zero-sized structs differently from other structs.
   // However, N32/N64 ignores zero sized return values.
   if (!IsO32 && Size == 0)
     return ABIArgInfo::getIgnore();
 
   if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
     if (Size <= 128) {
       if (RetTy->isAnyComplexType())
         return ABIArgInfo::getDirect();
 
       // O32 returns integer vectors in registers and N32/N64 returns all small
       // aggregates in registers.
       if (!IsO32 ||
           (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) {
         ABIArgInfo ArgInfo =
             ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size));
         ArgInfo.setInReg(true);
         return ArgInfo;
       }
     }
 
     return getNaturalAlignIndirect(RetTy);
   }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
     RetTy = EnumTy->getDecl()->getIntegerType();
 
   return (RetTy->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
 void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
   ABIArgInfo &RetInfo = FI.getReturnInfo();
   if (!getCXXABI().classifyReturnType(FI))
     RetInfo = classifyReturnType(FI.getReturnType());
 
   // Check if a pointer to an aggregate is passed as a hidden argument.
   uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
 
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type, Offset);
 }
 
 Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                QualType OrigTy) const {
   QualType Ty = OrigTy;
 
   // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
   // Pointers are also promoted in the same way but this only matters for N32.
   unsigned SlotSizeInBits = IsO32 ? 32 : 64;
   unsigned PtrWidth = getTarget().getPointerWidth(0);
   bool DidPromote = false;
   if ((Ty->isIntegerType() &&
           getContext().getIntWidth(Ty) < SlotSizeInBits) ||
       (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
     DidPromote = true;
     Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits,
                                             Ty->isSignedIntegerType());
   }
 
   auto TyInfo = getContext().getTypeInfoInChars(Ty);
 
   // The alignment of things in the argument area is never larger than
   // StackAlignInBytes.
   TyInfo.second =
     std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes));
 
   // MinABIStackAlignInBytes is the size of argument slots on the stack.
   CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
 
   Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
                           TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true);
 
 
   // If there was a promotion, "unpromote" into a temporary.
   // TODO: can we just use a pointer into a subset of the original slot?
   if (DidPromote) {
     Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
     llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);
 
     // Truncate down to the right width.
     llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType()
                                                  : CGF.IntPtrTy);
     llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
     if (OrigTy->isPointerType())
       V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());
 
     CGF.Builder.CreateStore(V, Temp);
     Addr = Temp;
   }
 
   return Addr;
 }
 
 bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
   int TySize = getContext().getTypeSize(Ty);
 
   // MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
   if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
     return true;
 
   return false;
 }
 
 bool
 MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                                llvm::Value *Address) const {
   // This information comes from gcc's implementation, which seems to
   // as canonical as it gets.
 
   // Everything on MIPS is 4 bytes.  Double-precision FP registers
   // are aliased to pairs of single-precision FP registers.
   llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
 
   // 0-31 are the general purpose registers, $0 - $31.
   // 32-63 are the floating-point registers, $f0 - $f31.
   // 64 and 65 are the multiply/divide registers, $hi and $lo.
   // 66 is the (notional, I think) register for signal-handler return.
   AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65);
 
   // 67-74 are the floating-point status registers, $fcc0 - $fcc7.
   // They are one bit wide and ignored here.
 
   // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31.
   // (coprocessor 1 is the FP unit)
   // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31.
   // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31.
   // 176-181 are the DSP accumulator registers.
   AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181);
   return false;
 }
 
 //===----------------------------------------------------------------------===//
 // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
 // Currently subclassed only to implement custom OpenCL C function attribute
 // handling.
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo {
 public:
   TCETargetCodeGenInfo(CodeGenTypes &CGT)
     : DefaultTargetCodeGenInfo(CGT) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 };
 
 void TCETargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
 
   llvm::Function *F = cast<llvm::Function>(GV);
 
   if (M.getLangOpts().OpenCL) {
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
       const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
       if (Attr) {
         // Convert the reqd_work_group_size() attributes to metadata.
         llvm::LLVMContext &Context = F->getContext();
         llvm::NamedMDNode *OpenCLMetadata =
             M.getModule().getOrInsertNamedMetadata(
                 "opencl.kernel_wg_size_info");
 
         SmallVector<llvm::Metadata *, 5> Operands;
         Operands.push_back(llvm::ConstantAsMetadata::get(F));
 
         Operands.push_back(
             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
                 M.Int32Ty, llvm::APInt(32, Attr->getXDim()))));
         Operands.push_back(
             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
                 M.Int32Ty, llvm::APInt(32, Attr->getYDim()))));
         Operands.push_back(
             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
                 M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));
 
         // Add a boolean constant operand for "required" (true) or "hint"
         // (false) for implementing the work_group_size_hint attr later.
         // Currently always true as the hint is not yet implemented.
         Operands.push_back(
             llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
         OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
       }
     }
   }
 }
 
 }
 
 //===----------------------------------------------------------------------===//
 // Hexagon ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class HexagonABIInfo : public ABIInfo {
 
 
 public:
   HexagonABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
 private:
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
     :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 29;
   }
 };
 
 }
 
 void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type);
 }
 
 ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
     return (Ty->isPromotableIntegerType() ?
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
   // Ignore empty records.
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size > 64)
     return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
     // Pass in the smallest viable integer type.
   else if (Size > 32)
       return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
   else if (Size > 16)
       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
   else if (Size > 8)
       return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
   else
       return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
 }
 
 ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64)
     return getNaturalAlignIndirect(RetTy);
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
     return (RetTy->isPromotableIntegerType() ?
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
   // Aggregates <= 8 bytes are returned in r0; other aggregates
   // are returned indirectly.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 64) {
     // Return in the smallest viable integer type.
     if (Size <= 8)
       return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
     if (Size <= 16)
       return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
     if (Size <= 32)
       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
     return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
   }
 
   return getNaturalAlignIndirect(RetTy, /*ByVal=*/true);
 }
 
 Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                   QualType Ty) const {
   // FIXME: Someone needs to audit that this handle alignment correctly.
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
                           getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(4),
                           /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
 // Lanai ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 class LanaiABIInfo : public DefaultABIInfo {
 public:
   LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
   bool shouldUseInReg(QualType Ty, CCState &State) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
     CCState State(FI.getCallingConvention());
     // Lanai uses 4 registers to pass arguments unless the function has the
     // regparm attribute set.
     if (FI.getHasRegParm()) {
       State.FreeRegs = FI.getRegParm();
     } else {
       State.FreeRegs = 4;
     }
 
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments())
       I.info = classifyArgumentType(I.type, State);
   }
 
   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
 };
 } // end anonymous namespace
 
 bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
   unsigned Size = getContext().getTypeSize(Ty);
   unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;
 
   if (SizeInRegs == 0)
     return false;
 
   if (SizeInRegs > State.FreeRegs) {
     State.FreeRegs = 0;
     return false;
   }
 
   State.FreeRegs -= SizeInRegs;
 
   return true;
 }
 
 ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
                                            CCState &State) const {
   if (!ByVal) {
     if (State.FreeRegs) {
       --State.FreeRegs; // Non-byval indirects just use one pointer.
       return getNaturalAlignIndirectInReg(Ty);
     }
     return getNaturalAlignIndirect(Ty, false);
   }
 
   // Compute the byval alignment.
   const unsigned MinABIStackAlignInBytes = 4;
   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
                                  /*Realign=*/TypeAlign >
                                      MinABIStackAlignInBytes);
 }
 
 ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
                                               CCState &State) const {
   // Check with the C++ ABI first.
   const RecordType *RT = Ty->getAs<RecordType>();
   if (RT) {
     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
     if (RAA == CGCXXABI::RAA_Indirect) {
       return getIndirectResult(Ty, /*ByVal=*/false, State);
     } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
       return getNaturalAlignIndirect(Ty, /*ByRef=*/true);
     }
   }
 
   if (isAggregateTypeForABI(Ty)) {
     // Structures with flexible arrays are always indirect.
     if (RT && RT->getDecl()->hasFlexibleArrayMember())
       return getIndirectResult(Ty, /*ByVal=*/true, State);
 
     // Ignore empty structs/unions.
     if (isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
     if (SizeInRegs <= State.FreeRegs) {
       llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
       SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
       State.FreeRegs -= SizeInRegs;
       return ABIArgInfo::getDirectInReg(Result);
     } else {
       State.FreeRegs = 0;
     }
     return getIndirectResult(Ty, true, State);
   }
 
   // Treat an enum type as its underlying type.
   if (const auto *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   bool InReg = shouldUseInReg(Ty, State);
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
       return ABIArgInfo::getDirectInReg();
     return ABIArgInfo::getExtend();
   }
   if (InReg)
     return ABIArgInfo::getDirectInReg();
   return ABIArgInfo::getDirect();
 }
 
 namespace {
 class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
       : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {}
 };
 }
 
 //===----------------------------------------------------------------------===//
 // AMDGPU ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
 };
 
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
   const Decl *D,
   llvm::GlobalValue *GV,
   CodeGen::CodeGenModule &M) const {
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD)
     return;
 
   if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
     llvm::Function *F = cast<llvm::Function>(GV);
     uint32_t NumVGPR = Attr->getNumVGPR();
     if (NumVGPR != 0)
       F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
   }
 
   if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
     llvm::Function *F = cast<llvm::Function>(GV);
     unsigned NumSGPR = Attr->getNumSGPR();
     if (NumSGPR != 0)
       F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
   }
 }
 
 
 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
   return llvm::CallingConv::AMDGPU_KERNEL;
 }
 
 //===----------------------------------------------------------------------===//
 // SPARC v8 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
 //
 // Ensures that complex values are passed in registers.
 //
 namespace {
 class SparcV8ABIInfo : public DefaultABIInfo {
 public:
   SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   void computeInfo(CGFunctionInfo &FI) const override;
 };
 } // end anonymous namespace
 
 
 ABIArgInfo
 SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
   if (Ty->isAnyComplexType()) {
     return ABIArgInfo::getDirect();
   }
   else {
     return DefaultABIInfo::classifyReturnType(Ty);
   }
 }
 
 void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (auto &Arg : FI.arguments())
     Arg.info = classifyArgumentType(Arg.type);
 }
 
 namespace {
 class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {}
 };
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
 //
 // Function arguments a mapped to a nominal "parameter array" and promoted to
 // registers depending on their type. Each argument occupies 8 or 16 bytes in
 // the array, structs larger than 16 bytes are passed indirectly.
 //
 // One case requires special care:
 //
 //   struct mixed {
 //     int i;
 //     float f;
 //   };
 //
 // When a struct mixed is passed by value, it only occupies 8 bytes in the
 // parameter array, but the int is passed in an integer register, and the float
 // is passed in a floating point register. This is represented as two arguments
 // with the LLVM IR inreg attribute:
 //
 //   declare void f(i32 inreg %i, float inreg %f)
 //
 // The code generator will only allocate 4 bytes from the parameter array for
 // the inreg arguments. All other arguments are allocated a multiple of 8
 // bytes.
 //
 namespace {
 class SparcV9ABIInfo : public ABIInfo {
 public:
   SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
 private:
   ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
   void computeInfo(CGFunctionInfo &FI) const override;
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   // Coercion type builder for structs passed in registers. The coercion type
   // serves two purposes:
   //
   // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned'
   //    in registers.
   // 2. Expose aligned floating point elements as first-level elements, so the
   //    code generator knows to pass them in floating point registers.
   //
   // We also compute the InReg flag which indicates that the struct contains
   // aligned 32-bit floats.
   //
   struct CoerceBuilder {
     llvm::LLVMContext &Context;
     const llvm::DataLayout &DL;
     SmallVector<llvm::Type*, 8> Elems;
     uint64_t Size;
     bool InReg;
 
     CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl)
       : Context(c), DL(dl), Size(0), InReg(false) {}
 
     // Pad Elems with integers until Size is ToSize.
     void pad(uint64_t ToSize) {
       assert(ToSize >= Size && "Cannot remove elements");
       if (ToSize == Size)
         return;
 
       // Finish the current 64-bit word.
       uint64_t Aligned = llvm::alignTo(Size, 64);
       if (Aligned > Size && Aligned <= ToSize) {
         Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
         Size = Aligned;
       }
 
       // Add whole 64-bit words.
       while (Size + 64 <= ToSize) {
         Elems.push_back(llvm::Type::getInt64Ty(Context));
         Size += 64;
       }
 
       // Final in-word padding.
       if (Size < ToSize) {
         Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size));
         Size = ToSize;
       }
     }
 
     // Add a floating point element at Offset.
     void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) {
       // Unaligned floats are treated as integers.
       if (Offset % Bits)
         return;
       // The InReg flag is only required if there are any floats < 64 bits.
       if (Bits < 64)
         InReg = true;
       pad(Offset);
       Elems.push_back(Ty);
       Size = Offset + Bits;
     }
 
     // Add a struct type to the coercion type, starting at Offset (in bits).
     void addStruct(uint64_t Offset, llvm::StructType *StrTy) {
       const llvm::StructLayout *Layout = DL.getStructLayout(StrTy);
       for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) {
         llvm::Type *ElemTy = StrTy->getElementType(i);
         uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i);
         switch (ElemTy->getTypeID()) {
         case llvm::Type::StructTyID:
           addStruct(ElemOffset, cast<llvm::StructType>(ElemTy));
           break;
         case llvm::Type::FloatTyID:
           addFloat(ElemOffset, ElemTy, 32);
           break;
         case llvm::Type::DoubleTyID:
           addFloat(ElemOffset, ElemTy, 64);
           break;
         case llvm::Type::FP128TyID:
           addFloat(ElemOffset, ElemTy, 128);
           break;
         case llvm::Type::PointerTyID:
           if (ElemOffset % 64 == 0) {
             pad(ElemOffset);
             Elems.push_back(ElemTy);
             Size += 64;
           }
           break;
         default:
           break;
         }
       }
     }
 
     // Check if Ty is a usable substitute for the coercion type.
     bool isUsableType(llvm::StructType *Ty) const {
       return llvm::makeArrayRef(Elems) == Ty->elements();
     }
 
     // Get the coercion type as a literal struct type.
     llvm::Type *getType() const {
       if (Elems.size() == 1)
         return Elems.front();
       else
         return llvm::StructType::get(Context, Elems);
     }
   };
 };
 } // end anonymous namespace
 
 ABIArgInfo
 SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
   if (Ty->isVoidType())
     return ABIArgInfo::getIgnore();
 
   uint64_t Size = getContext().getTypeSize(Ty);
 
   // Anything too big to fit in registers is passed with an explicit indirect
   // pointer / sret pointer.
   if (Size > SizeLimit)
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Integer types smaller than a register are extended.
   if (Size < 64 && Ty->isIntegerType())
     return ABIArgInfo::getExtend();
 
   // Other non-aggregates go in registers.
   if (!isAggregateTypeForABI(Ty))
     return ABIArgInfo::getDirect();
 
   // If a C++ object has either a non-trivial copy constructor or a non-trivial
   // destructor, it is passed with an explicit indirect pointer / sret pointer.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
     return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // This is a small aggregate type that should be passed in registers.
   // Build a coercion type from the LLVM struct type.
   llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
   if (!StrTy)
     return ABIArgInfo::getDirect();
 
   CoerceBuilder CB(getVMContext(), getDataLayout());
   CB.addStruct(0, StrTy);
   CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));
 
   // Try to use the original type for coercion.
   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
 
   if (CB.InReg)
     return ABIArgInfo::getDirectInReg(CoerceTy);
   else
     return ABIArgInfo::getDirect(CoerceTy);
 }
 
 Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                   QualType Ty) const {
   ABIArgInfo AI = classifyType(Ty, 16 * 8);
   llvm::Type *ArgTy = CGT.ConvertType(Ty);
   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
     AI.setCoerceToType(ArgTy);
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
   CGBuilderTy &Builder = CGF.Builder;
   Address Addr(Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize);
   llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
 
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
 
   Address ArgAddr = Address::invalid();
   CharUnits Stride;
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
   case ABIArgInfo::Extend: {
     Stride = SlotSize;
     CharUnits Offset = SlotSize - TypeInfo.first;
     ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
     break;
   }
 
   case ABIArgInfo::Direct: {
     auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
     Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
     ArgAddr = Addr;
     break;
   }
 
   case ABIArgInfo::Indirect:
     Stride = SlotSize;
     ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
     ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"),
                       TypeInfo.second);
     break;
 
   case ABIArgInfo::Ignore:
     return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second);
   }
 
   // Update VAList.
   llvm::Value *NextPtr =
     Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next");
   Builder.CreateStore(NextPtr, VAListAddr);
 
   return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr");
 }
 
 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
   for (auto &I : FI.arguments())
     I.info = classifyType(I.type, 16 * 8);
 }
 
 namespace {
 class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 14;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 };
 } // end anonymous namespace
 
 bool
 SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                                 llvm::Value *Address) const {
   // This is calculated from the LLVM and GCC tables and verified
   // against gcc output.  AFAIK all ABIs use the same encoding.
 
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
 
   llvm::IntegerType *i8 = CGF.Int8Ty;
   llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
   llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
 
   // 0-31: the 8-byte general-purpose registers
   AssignToArrayRange(Builder, Address, Eight8, 0, 31);
 
   // 32-63: f0-31, the 4-byte floating-point registers
   AssignToArrayRange(Builder, Address, Four8, 32, 63);
 
   //   Y   = 64
   //   PSR = 65
   //   WIM = 66
   //   TBR = 67
   //   PC  = 68
   //   NPC = 69
   //   FSR = 70
   //   CSR = 71
   AssignToArrayRange(Builder, Address, Eight8, 64, 71);
 
   // 72-87: d0-15, the 8-byte floating-point registers
   AssignToArrayRange(Builder, Address, Eight8, 72, 87);
 
   return false;
 }
 
 
 //===----------------------------------------------------------------------===//
 // XCore ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
 /// A SmallStringEnc instance is used to build up the TypeString by passing
 /// it by reference between functions that append to it.
 typedef llvm::SmallString<128> SmallStringEnc;
 
 /// TypeStringCache caches the meta encodings of Types.
 ///
 /// The reason for caching TypeStrings is two fold:
 ///   1. To cache a type's encoding for later uses;
 ///   2. As a means to break recursive member type inclusion.
 ///
 /// A cache Entry can have a Status of:
 ///   NonRecursive:   The type encoding is not recursive;
 ///   Recursive:      The type encoding is recursive;
 ///   Incomplete:     An incomplete TypeString;
 ///   IncompleteUsed: An incomplete TypeString that has been used in a
 ///                   Recursive type encoding.
 ///
 /// A NonRecursive entry will have all of its sub-members expanded as fully
 /// as possible. Whilst it may contain types which are recursive, the type
 /// itself is not recursive and thus its encoding may be safely used whenever
 /// the type is encountered.
 ///
 /// A Recursive entry will have all of its sub-members expanded as fully as
 /// possible. The type itself is recursive and it may contain other types which
 /// are recursive. The Recursive encoding must not be used during the expansion
 /// of a recursive type's recursive branch. For simplicity the code uses
 /// IncompleteCount to reject all usage of Recursive encodings for member types.
 ///
 /// An Incomplete entry is always a RecordType and only encodes its
 /// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
 /// are placed into the cache during type expansion as a means to identify and
 /// handle recursive inclusion of types as sub-members. If there is recursion
 /// the entry becomes IncompleteUsed.
 ///
 /// During the expansion of a RecordType's members:
 ///
 ///   If the cache contains a NonRecursive encoding for the member type, the
 ///   cached encoding is used;
 ///
 ///   If the cache contains a Recursive encoding for the member type, the
 ///   cached encoding is 'Swapped' out, as it may be incorrect, and...
 ///
 ///   If the member is a RecordType, an Incomplete encoding is placed into the
 ///   cache to break potential recursive inclusion of itself as a sub-member;
 ///
 ///   Once a member RecordType has been expanded, its temporary incomplete
 ///   entry is removed from the cache. If a Recursive encoding was swapped out
 ///   it is swapped back in;
 ///
 ///   If an incomplete entry is used to expand a sub-member, the incomplete
 ///   entry is marked as IncompleteUsed. The cache keeps count of how many
 ///   IncompleteUsed entries it currently contains in IncompleteUsedCount;
 ///
 ///   If a member's encoding is found to be a NonRecursive or Recursive viz:
 ///   IncompleteUsedCount==0, the member's encoding is added to the cache.
 ///   Else the member is part of a recursive type and thus the recursion has
 ///   been exited too soon for the encoding to be correct for the member.
 ///
 class TypeStringCache {
   enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
   struct Entry {
     std::string Str;     // The encoded TypeString for the type.
     enum Status State;   // Information about the encoding in 'Str'.
     std::string Swapped; // A temporary place holder for a Recursive encoding
                          // during the expansion of RecordType's members.
   };
   std::map<const IdentifierInfo *, struct Entry> Map;
   unsigned IncompleteCount;     // Number of Incomplete entries in the Map.
   unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
 public:
   TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {}
   void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
   bool removeIncomplete(const IdentifierInfo *ID);
   void addIfComplete(const IdentifierInfo *ID, StringRef Str,
                      bool IsRecursive);
   StringRef lookupStr(const IdentifierInfo *ID);
 };
 
 /// TypeString encodings for enum & union fields must be order.
 /// FieldEncoding is a helper for this ordering process.
 class FieldEncoding {
   bool HasName;
   std::string Enc;
 public:
   FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {}
   StringRef str() {return Enc.c_str();}
   bool operator<(const FieldEncoding &rhs) const {
     if (HasName != rhs.HasName) return HasName;
     return Enc < rhs.Enc;
   }
 };
 
 class XCoreABIInfo : public DefaultABIInfo {
 public:
   XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 };
 
 class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
   mutable TypeStringCache TSC;
 public:
   XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
     :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {}
   void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                     CodeGen::CodeGenModule &M) const override;
 };
 
 } // End anonymous namespace.
 
 // TODO: this implementation is likely now redundant with the default
 // EmitVAArg.
 Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   CGBuilderTy &Builder = CGF.Builder;
 
   // Get the VAList.
   CharUnits SlotSize = CharUnits::fromQuantity(4);
   Address AP(Builder.CreateLoad(VAListAddr), SlotSize);
 
   // Handle the argument.
   ABIArgInfo AI = classifyArgumentType(Ty);
   CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty);
   llvm::Type *ArgTy = CGT.ConvertType(Ty);
   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
     AI.setCoerceToType(ArgTy);
   llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
 
   Address Val = Address::invalid();
   CharUnits ArgSize = CharUnits::Zero();
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
     Val = Address(llvm::UndefValue::get(ArgPtrTy), TypeAlign);
     ArgSize = CharUnits::Zero();
     break;
   case ABIArgInfo::Extend:
   case ABIArgInfo::Direct:
     Val = Builder.CreateBitCast(AP, ArgPtrTy);
     ArgSize = CharUnits::fromQuantity(
                        getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
     ArgSize = ArgSize.alignTo(SlotSize);
     break;
   case ABIArgInfo::Indirect:
     Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
     Val = Address(Builder.CreateLoad(Val), TypeAlign);
     ArgSize = SlotSize;
     break;
   }
 
   // Increment the VAList.
   if (!ArgSize.isZero()) {
     llvm::Value *APN =
       Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize);
     Builder.CreateStore(APN, VAListAddr);
   }
 
   return Val;
 }
 
 /// During the expansion of a RecordType, an incomplete TypeString is placed
 /// into the cache as a means to identify and break recursion.
 /// If there is a Recursive encoding in the cache, it is swapped out and will
 /// be reinserted by removeIncomplete().
 /// All other types of encoding should have been used rather than arriving here.
 void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
                                     std::string StubEnc) {
   if (!ID)
     return;
   Entry &E = Map[ID];
   assert( (E.Str.empty() || E.State == Recursive) &&
          "Incorrectly use of addIncomplete");
   assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
   E.Swapped.swap(E.Str); // swap out the Recursive
   E.Str.swap(StubEnc);
   E.State = Incomplete;
   ++IncompleteCount;
 }
 
 /// Once the RecordType has been expanded, the temporary incomplete TypeString
 /// must be removed from the cache.
 /// If a Recursive was swapped out by addIncomplete(), it will be replaced.
 /// Returns true if the RecordType was defined recursively.
 bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
   if (!ID)
     return false;
   auto I = Map.find(ID);
   assert(I != Map.end() && "Entry not present");
   Entry &E = I->second;
   assert( (E.State == Incomplete ||
            E.State == IncompleteUsed) &&
          "Entry must be an incomplete type");
   bool IsRecursive = false;
   if (E.State == IncompleteUsed) {
     // We made use of our Incomplete encoding, thus we are recursive.
     IsRecursive = true;
     --IncompleteUsedCount;
   }
   if (E.Swapped.empty())
     Map.erase(I);
   else {
     // Swap the Recursive back.
     E.Swapped.swap(E.Str);
     E.Swapped.clear();
     E.State = Recursive;
   }
   --IncompleteCount;
   return IsRecursive;
 }
 
 /// Add the encoded TypeString to the cache only if it is NonRecursive or
 /// Recursive (viz: all sub-members were expanded as fully as possible).
 void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
                                     bool IsRecursive) {
   if (!ID || IncompleteUsedCount)
     return; // No key or it is is an incomplete sub-type so don't add.
   Entry &E = Map[ID];
   if (IsRecursive && !E.Str.empty()) {
     assert(E.State==Recursive && E.Str.size() == Str.size() &&
            "This is not the same Recursive entry");
     // The parent container was not recursive after all, so we could have used
     // this Recursive sub-member entry after all, but we assumed the worse when
     // we started viz: IncompleteCount!=0.
     return;
   }
   assert(E.Str.empty() && "Entry already present");
   E.Str = Str.str();
   E.State = IsRecursive? Recursive : NonRecursive;
 }
 
 /// Return a cached TypeString encoding for the ID. If there isn't one, or we
 /// are recursively expanding a type (IncompleteCount != 0) and the cached
 /// encoding is Recursive, return an empty StringRef.
 StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
   if (!ID)
     return StringRef();   // We have no key.
   auto I = Map.find(ID);
   if (I == Map.end())
     return StringRef();   // We have no encoding.
   Entry &E = I->second;
   if (E.State == Recursive && IncompleteCount)
     return StringRef();   // We don't use Recursive encodings for member types.
 
   if (E.State == Incomplete) {
     // The incomplete type is being used to break out of recursion.
     E.State = IncompleteUsed;
     ++IncompleteUsedCount;
   }
   return E.Str.c_str();
 }
 
 /// The XCore ABI includes a type information section that communicates symbol
 /// type information to the linker. The linker uses this information to verify
 /// safety/correctness of things such as array bound and pointers et al.
 /// The ABI only requires C (and XC) language modules to emit TypeStrings.
 /// This type information (TypeString) is emitted into meta data for all global
 /// symbols: definitions, declarations, functions & variables.
 ///
 /// The TypeString carries type, qualifier, name, size & value details.
 /// Please see 'Tools Development Guide' section 2.16.2 for format details:
 /// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf
 /// The output is tested by test/CodeGen/xcore-stringtype.c.
 ///
 static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
                           CodeGen::CodeGenModule &CGM, TypeStringCache &TSC);
 
 /// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
 void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                                           CodeGen::CodeGenModule &CGM) const {
   SmallStringEnc Enc;
   if (getTypeString(Enc, D, CGM, TSC)) {
     llvm::LLVMContext &Ctx = CGM.getModule().getContext();
     llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
                                 llvm::MDString::get(Ctx, Enc.str())};
     llvm::NamedMDNode *MD =
       CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
     MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
   }
 }
 
 //===----------------------------------------------------------------------===//
 // SPIR ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                     CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
 };
 } // End anonymous namespace.
 
 /// Emit SPIR specific metadata: OpenCL and SPIR version.
 void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                                          CodeGen::CodeGenModule &CGM) const {
   llvm::LLVMContext &Ctx = CGM.getModule().getContext();
   llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
   llvm::Module &M = CGM.getModule();
   // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
   // opencl.spir.version named metadata.
   llvm::Metadata *SPIRVerElts[] = {
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)),
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))};
   llvm::NamedMDNode *SPIRVerMD =
       M.getOrInsertNamedMetadata("opencl.spir.version");
   SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
   // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
   // opencl.ocl.version named metadata node.
   llvm::Metadata *OCLVerElts[] = {
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
   llvm::NamedMDNode *OCLVerMD =
       M.getOrInsertNamedMetadata("opencl.ocl.version");
   OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
 }
 
 unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
   return llvm::CallingConv::SPIR_KERNEL;
 }
 
 static bool appendType(SmallStringEnc &Enc, QualType QType,
                        const CodeGen::CodeGenModule &CGM,
                        TypeStringCache &TSC);
 
 /// Helper function for appendRecordType().
 /// Builds a SmallVector containing the encoded field types in declaration
 /// order.
 static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
                              const RecordDecl *RD,
                              const CodeGen::CodeGenModule &CGM,
                              TypeStringCache &TSC) {
   for (const auto *Field : RD->fields()) {
     SmallStringEnc Enc;
     Enc += "m(";
     Enc += Field->getName();
     Enc += "){";
     if (Field->isBitField()) {
       Enc += "b(";
       llvm::raw_svector_ostream OS(Enc);
       OS << Field->getBitWidthValue(CGM.getContext());
       Enc += ':';
     }
     if (!appendType(Enc, Field->getType(), CGM, TSC))
       return false;
     if (Field->isBitField())
       Enc += ')';
     Enc += '}';
     FE.emplace_back(!Field->getName().empty(), Enc);
   }
   return true;
 }
 
 /// Appends structure and union types to Enc and adds encoding to cache.
 /// Recursively calls appendType (via extractFieldType) for each field.
 /// Union types have their fields ordered according to the ABI.
 static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
                              const CodeGen::CodeGenModule &CGM,
                              TypeStringCache &TSC, const IdentifierInfo *ID) {
   // Append the cached TypeString if we have one.
   StringRef TypeString = TSC.lookupStr(ID);
   if (!TypeString.empty()) {
     Enc += TypeString;
     return true;
   }
 
   // Start to emit an incomplete TypeString.
   size_t Start = Enc.size();
   Enc += (RT->isUnionType()? 'u' : 's');
   Enc += '(';
   if (ID)
     Enc += ID->getName();
   Enc += "){";
 
   // We collect all encoded fields and order as necessary.
   bool IsRecursive = false;
   const RecordDecl *RD = RT->getDecl()->getDefinition();
   if (RD && !RD->field_empty()) {
     // An incomplete TypeString stub is placed in the cache for this RecordType
     // so that recursive calls to this RecordType will use it whilst building a
     // complete TypeString for this RecordType.
     SmallVector<FieldEncoding, 16> FE;
     std::string StubEnc(Enc.substr(Start).str());
     StubEnc += '}';  // StubEnc now holds a valid incomplete TypeString.
     TSC.addIncomplete(ID, std::move(StubEnc));
     if (!extractFieldType(FE, RD, CGM, TSC)) {
       (void) TSC.removeIncomplete(ID);
       return false;
     }
     IsRecursive = TSC.removeIncomplete(ID);
     // The ABI requires unions to be sorted but not structures.
     // See FieldEncoding::operator< for sort algorithm.
     if (RT->isUnionType())
       std::sort(FE.begin(), FE.end());
     // We can now complete the TypeString.
     unsigned E = FE.size();
     for (unsigned I = 0; I != E; ++I) {
       if (I)
         Enc += ',';
       Enc += FE[I].str();
     }
   }
   Enc += '}';
   TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
   return true;
 }
 
 /// Appends enum types to Enc and adds the encoding to the cache.
 static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
                            TypeStringCache &TSC,
                            const IdentifierInfo *ID) {
   // Append the cached TypeString if we have one.
   StringRef TypeString = TSC.lookupStr(ID);
   if (!TypeString.empty()) {
     Enc += TypeString;
     return true;
   }
 
   size_t Start = Enc.size();
   Enc += "e(";
   if (ID)
     Enc += ID->getName();
   Enc += "){";
 
   // We collect all encoded enumerations and order them alphanumerically.
   if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
     SmallVector<FieldEncoding, 16> FE;
     for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
          ++I) {
       SmallStringEnc EnumEnc;
       EnumEnc += "m(";
       EnumEnc += I->getName();
       EnumEnc += "){";
       I->getInitVal().toString(EnumEnc);
       EnumEnc += '}';
       FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
     }
     std::sort(FE.begin(), FE.end());
     unsigned E = FE.size();
     for (unsigned I = 0; I != E; ++I) {
       if (I)
         Enc += ',';
       Enc += FE[I].str();
     }
   }
   Enc += '}';
   TSC.addIfComplete(ID, Enc.substr(Start), false);
   return true;
 }
 
 /// Appends type's qualifier to Enc.
 /// This is done prior to appending the type's encoding.
 static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
   // Qualifiers are emitted in alphabetical order.
   static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"};
   int Lookup = 0;
   if (QT.isConstQualified())
     Lookup += 1<<0;
   if (QT.isRestrictQualified())
     Lookup += 1<<1;
   if (QT.isVolatileQualified())
     Lookup += 1<<2;
   Enc += Table[Lookup];
 }
 
 /// Appends built-in types to Enc.
 static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
   const char *EncType;
   switch (BT->getKind()) {
     case BuiltinType::Void:
       EncType = "0";
       break;
     case BuiltinType::Bool:
       EncType = "b";
       break;
     case BuiltinType::Char_U:
       EncType = "uc";
       break;
     case BuiltinType::UChar:
       EncType = "uc";
       break;
     case BuiltinType::SChar:
       EncType = "sc";
       break;
     case BuiltinType::UShort:
       EncType = "us";
       break;
     case BuiltinType::Short:
       EncType = "ss";
       break;
     case BuiltinType::UInt:
       EncType = "ui";
       break;
     case BuiltinType::Int:
       EncType = "si";
       break;
     case BuiltinType::ULong:
       EncType = "ul";
       break;
     case BuiltinType::Long:
       EncType = "sl";
       break;
     case BuiltinType::ULongLong:
       EncType = "ull";
       break;
     case BuiltinType::LongLong:
       EncType = "sll";
       break;
     case BuiltinType::Float:
       EncType = "ft";
       break;
     case BuiltinType::Double:
       EncType = "d";
       break;
     case BuiltinType::LongDouble:
       EncType = "ld";
       break;
     default:
       return false;
   }
   Enc += EncType;
   return true;
 }
 
 /// Appends a pointer encoding to Enc before calling appendType for the pointee.
 static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
                               const CodeGen::CodeGenModule &CGM,
                               TypeStringCache &TSC) {
   Enc += "p(";
   if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
     return false;
   Enc += ')';
   return true;
 }
 
 /// Appends array encoding to Enc before calling appendType for the element.
 static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
                             const ArrayType *AT,
                             const CodeGen::CodeGenModule &CGM,
                             TypeStringCache &TSC, StringRef NoSizeEnc) {
   if (AT->getSizeModifier() != ArrayType::Normal)
     return false;
   Enc += "a(";
   if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
     CAT->getSize().toStringUnsigned(Enc);
   else
     Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
   Enc += ':';
   // The Qualifiers should be attached to the type rather than the array.
   appendQualifier(Enc, QT);
   if (!appendType(Enc, AT->getElementType(), CGM, TSC))
     return false;
   Enc += ')';
   return true;
 }
 
 /// Appends a function encoding to Enc, calling appendType for the return type
 /// and the arguments.
 static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
                              const CodeGen::CodeGenModule &CGM,
                              TypeStringCache &TSC) {
   Enc += "f{";
   if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
     return false;
   Enc += "}(";
   if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
     // N.B. we are only interested in the adjusted param types.
     auto I = FPT->param_type_begin();
     auto E = FPT->param_type_end();
     if (I != E) {
       do {
         if (!appendType(Enc, *I, CGM, TSC))
           return false;
         ++I;
         if (I != E)
           Enc += ',';
       } while (I != E);
       if (FPT->isVariadic())
         Enc += ",va";
     } else {
       if (FPT->isVariadic())
         Enc += "va";
       else
         Enc += '0';
     }
   }
   Enc += ')';
   return true;
 }
 
 /// Handles the type's qualifier before dispatching a call to handle specific
 /// type encodings.
 static bool appendType(SmallStringEnc &Enc, QualType QType,
                        const CodeGen::CodeGenModule &CGM,
                        TypeStringCache &TSC) {
 
   QualType QT = QType.getCanonicalType();
 
   if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
     // The Qualifiers should be attached to the type rather than the array.
     // Thus we don't call appendQualifier() here.
     return appendArrayType(Enc, QT, AT, CGM, TSC, "");
 
   appendQualifier(Enc, QT);
 
   if (const BuiltinType *BT = QT->getAs<BuiltinType>())
     return appendBuiltinType(Enc, BT);
 
   if (const PointerType *PT = QT->getAs<PointerType>())
     return appendPointerType(Enc, PT, CGM, TSC);
 
   if (const EnumType *ET = QT->getAs<EnumType>())
     return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());
 
   if (const RecordType *RT = QT->getAsStructureType())
     return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
 
   if (const RecordType *RT = QT->getAsUnionType())
     return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
 
   if (const FunctionType *FT = QT->getAs<FunctionType>())
     return appendFunctionType(Enc, FT, CGM, TSC);
 
   return false;
 }
 
 static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
                           CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) {
   if (!D)
     return false;
 
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->getLanguageLinkage() != CLanguageLinkage)
       return false;
     return appendType(Enc, FD->getType(), CGM, TSC);
   }
 
   if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
     if (VD->getLanguageLinkage() != CLanguageLinkage)
       return false;
     QualType QT = VD->getType().getCanonicalType();
     if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
       // Global ArrayTypes are given a size of '*' if the size is unknown.
       // The Qualifiers should be attached to the type rather than the array.
       // Thus we don't call appendQualifier() here.
       return appendArrayType(Enc, QT, AT, CGM, TSC, "*");
     }
     return appendType(Enc, QT, CGM, TSC);
   }
   return false;
 }
 
 
 //===----------------------------------------------------------------------===//
 // Driver code
 //===----------------------------------------------------------------------===//
 
 const llvm::Triple &CodeGenModule::getTriple() const {
   return getTarget().getTriple();
 }
 
 bool CodeGenModule::supportsCOMDAT() const {
   return getTriple().supportsCOMDAT();
 }
 
 const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   if (TheTargetCodeGenInfo)
     return *TheTargetCodeGenInfo;
 
   // Helper to set the unique_ptr while still keeping the return value.
   auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
     this->TheTargetCodeGenInfo.reset(P);
     return *P;
   };
 
   const llvm::Triple &Triple = getTarget().getTriple();
   switch (Triple.getArch()) {
   default:
     return SetCGInfo(new DefaultTargetCodeGenInfo(Types));
 
   case llvm::Triple::le32:
     return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
     if (Triple.getOS() == llvm::Triple::NaCl)
       return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
     return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));
 
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be: {
     AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
     if (getTarget().getABI() == "darwinpcs")
       Kind = AArch64ABIInfo::DarwinPCS;
 
     return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
   }
 
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types));
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
     if (Triple.getOS() == llvm::Triple::Win32) {
       return SetCGInfo(
           new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP));
     }
 
     ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
     StringRef ABIStr = getTarget().getABI();
     if (ABIStr == "apcs-gnu")
       Kind = ARMABIInfo::APCS;
     else if (ABIStr == "aapcs16")
       Kind = ARMABIInfo::AAPCS16_VFP;
     else if (CodeGenOpts.FloatABI == "hard" ||
              (CodeGenOpts.FloatABI != "soft" &&
               (Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
                Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
                Triple.getEnvironment() == llvm::Triple::EABIHF)))
       Kind = ARMABIInfo::AAPCS_VFP;
 
     return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
   }
 
   case llvm::Triple::ppc:
     return SetCGInfo(
         new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
   case llvm::Triple::ppc64:
     if (Triple.isOSBinFormatELF()) {
       PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
       if (getTarget().getABI() == "elfv2")
         Kind = PPC64_SVR4_ABIInfo::ELFv2;
       bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+      bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-      return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+      return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                        IsSoftFloat));
     } else
       return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
     PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
     if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx")
       Kind = PPC64_SVR4_ABIInfo::ELFv1;
     bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+    bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-    return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+    return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                      IsSoftFloat));
   }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
     return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));
 
   case llvm::Triple::msp430:
     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
 
   case llvm::Triple::systemz: {
     bool HasVector = getTarget().getABI() == "vector";
     return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
   }
 
   case llvm::Triple::tce:
     return SetCGInfo(new TCETargetCodeGenInfo(Types));
 
   case llvm::Triple::x86: {
     bool IsDarwinVectorABI = Triple.isOSDarwin();
     bool RetSmallStructInRegABI =
         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
       return SetCGInfo(new WinX86_32TargetCodeGenInfo(
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
       return SetCGInfo(new X86_32TargetCodeGenInfo(
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
           CodeGenOpts.FloatABI == "soft"));
     }
   }
 
   case llvm::Triple::x86_64: {
     StringRef ABI = getTarget().getABI();
     X86AVXABILevel AVXLevel =
         (ABI == "avx512"
              ? X86AVXABILevel::AVX512
              : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
 
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
       return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
     case llvm::Triple::PS4:
       return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel));
     default:
       return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
     }
   }
   case llvm::Triple::hexagon:
     return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
   case llvm::Triple::lanai:
     return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
   case llvm::Triple::r600:
     return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::amdgcn:
     return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::sparc:
     return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
     return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
     return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
   case llvm::Triple::spir:
   case llvm::Triple::spir64:
     return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
   }
 }
Index: head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
===================================================================
--- head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp	(revision 309148)
+++ head/contrib/llvm/tools/clang/lib/Driver/Tools.cpp	(revision 309149)
@@ -1,11356 +1,11349 @@
 //===--- Tools.cpp - Tools Implementations ----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "Tools.h"
 #include "InputInfo.h"
 #include "ToolChains.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/Version.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Job.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/SanitizerArgs.h"
 #include "clang/Driver/ToolChain.h"
 #include "clang/Driver/Util.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetParser.h"
 
 #ifdef LLVM_ON_UNIX
 #include <unistd.h> // For getuid().
 #endif
 
 using namespace clang::driver;
 using namespace clang::driver::tools;
 using namespace clang;
 using namespace llvm::opt;
 
 static void handleTargetFeaturesGroup(const ArgList &Args,
                                       std::vector<const char *> &Features,
                                       OptSpecifier Group) {
   for (const Arg *A : Args.filtered(Group)) {
     StringRef Name = A->getOption().getName();
     A->claim();
 
     // Skip over "-m".
     assert(Name.startswith("m") && "Invalid feature name.");
     Name = Name.substr(1);
 
     bool IsNegative = Name.startswith("no-");
     if (IsNegative)
       Name = Name.substr(3);
     Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
   }
 }
 
 static const char *getSparcAsmModeForCPU(StringRef Name,
                                          const llvm::Triple &Triple) {
   if (Triple.getArch() == llvm::Triple::sparcv9) {
     return llvm::StringSwitch<const char *>(Name)
           .Case("niagara", "-Av9b")
           .Case("niagara2", "-Av9b")
           .Case("niagara3", "-Av9d")
           .Case("niagara4", "-Av9d")
           .Default("-Av9");
   } else {
     return llvm::StringSwitch<const char *>(Name)
           .Case("v8", "-Av8")
           .Case("supersparc", "-Av8")
           .Case("sparclite", "-Asparclite")
           .Case("f934", "-Asparclite")
           .Case("hypersparc", "-Av8")
           .Case("sparclite86x", "-Asparclite")
           .Case("sparclet", "-Asparclet")
           .Case("tsc701", "-Asparclet")
           .Case("v9", "-Av8plus")
           .Case("ultrasparc", "-Av8plus")
           .Case("ultrasparc3", "-Av8plus")
           .Case("niagara", "-Av8plusb")
           .Case("niagara2", "-Av8plusb")
           .Case("niagara3", "-Av8plusd")
           .Case("niagara4", "-Av8plusd")
           .Case("leon2", "-Av8")
           .Case("at697e", "-Av8")
           .Case("at697f", "-Av8")
           .Case("leon3", "-Av8")
           .Case("ut699", "-Av8")
           .Case("gr712rc", "-Av8")
           .Case("leon4", "-Av8")
           .Case("gr740", "-Av8")
           .Default("-Av8");
   }
 }
 
 /// CheckPreprocessingOptions - Perform some validation of preprocessing
 /// arguments that is shared with gcc.
 static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC)) {
     if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
         !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
       D.Diag(diag::err_drv_argument_only_allowed_with)
           << A->getBaseArg().getAsString(Args)
           << (D.IsCLMode() ? "/E, /P or /EP" : "-E");
     }
   }
 }
 
 /// CheckCodeGenerationOptions - Perform some validation of code generation
 /// arguments that is shared with gcc.
 static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
   // In gcc, only ARM checks this, but it seems reasonable to check universally.
   if (Args.hasArg(options::OPT_static))
     if (const Arg *A =
             Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
       D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
                                                       << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
 // This is used for the space-separated argument list specified with
 // the -dwarf-debug-flags option.
 static void EscapeSpacesAndBackslashes(const char *Arg,
                                        SmallVectorImpl<char> &Res) {
   for (; *Arg; ++Arg) {
     switch (*Arg) {
     default:
       break;
     case ' ':
     case '\\':
       Res.push_back('\\');
       break;
     }
     Res.push_back(*Arg);
   }
 }
 
 // Quote target names for inclusion in GNU Make dependency files.
 // Only the characters '$', '#', ' ', '\t' are quoted.
 static void QuoteTarget(StringRef Target, SmallVectorImpl<char> &Res) {
   for (unsigned i = 0, e = Target.size(); i != e; ++i) {
     switch (Target[i]) {
     case ' ':
     case '\t':
       // Escape the preceding backslashes
       for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j)
         Res.push_back('\\');
 
       // Escape the space/tab
       Res.push_back('\\');
       break;
     case '$':
       Res.push_back('$');
       break;
     case '#':
       Res.push_back('\\');
       break;
     default:
       break;
     }
 
     Res.push_back(Target[i]);
   }
 }
 
 static void addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs,
                              const char *ArgName, const char *EnvVar) {
   const char *DirList = ::getenv(EnvVar);
   bool CombinedArg = false;
 
   if (!DirList)
     return; // Nothing to do.
 
   StringRef Name(ArgName);
   if (Name.equals("-I") || Name.equals("-L"))
     CombinedArg = true;
 
   StringRef Dirs(DirList);
   if (Dirs.empty()) // Empty string should not add '.'.
     return;
 
   StringRef::size_type Delim;
   while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) {
     if (Delim == 0) { // Leading colon.
       if (CombinedArg) {
         CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
       } else {
         CmdArgs.push_back(ArgName);
         CmdArgs.push_back(".");
       }
     } else {
       if (CombinedArg) {
         CmdArgs.push_back(
             Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
       } else {
         CmdArgs.push_back(ArgName);
         CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
       }
     }
     Dirs = Dirs.substr(Delim + 1);
   }
 
   if (Dirs.empty()) { // Trailing colon.
     if (CombinedArg) {
       CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
     } else {
       CmdArgs.push_back(ArgName);
       CmdArgs.push_back(".");
     }
   } else { // Add the last path.
     if (CombinedArg) {
       CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs));
     } else {
       CmdArgs.push_back(ArgName);
       CmdArgs.push_back(Args.MakeArgString(Dirs));
     }
   }
 }
 
 static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
                             const ArgList &Args, ArgStringList &CmdArgs) {
   const Driver &D = TC.getDriver();
 
   // Add extra linker input arguments which are not treated as inputs
   // (constructed via -Xarch_).
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   for (const auto &II : Inputs) {
     if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
       // Don't try to pass LLVM inputs unless we have native support.
       D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
 
     // Add filenames immediately.
     if (II.isFilename()) {
       CmdArgs.push_back(II.getFilename());
       continue;
     }
 
     // Otherwise, this is a linker input argument.
     const Arg &A = II.getInputArg();
 
     // Handle reserved library options.
     if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx))
       TC.AddCXXStdlibLibArgs(Args, CmdArgs);
     else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext))
       TC.AddCCKextLibArgs(Args, CmdArgs);
     else if (A.getOption().matches(options::OPT_z)) {
       // Pass -z prefix for gcc linker compatibility.
       A.claim();
       A.render(Args, CmdArgs);
     } else {
       A.renderAsInput(Args, CmdArgs);
     }
   }
 
   // LIBRARY_PATH - included following the user specified library paths.
   //                and only supported on native toolchains.
   if (!TC.isCrossCompiling())
     addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 }
 
 /// \brief Determine whether Objective-C automated reference counting is
 /// enabled.
 static bool isObjCAutoRefCount(const ArgList &Args) {
   return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false);
 }
 
 /// \brief Determine whether we are linking the ObjC runtime.
 static bool isObjCRuntimeLinked(const ArgList &Args) {
   if (isObjCAutoRefCount(Args)) {
     Args.ClaimAllArgs(options::OPT_fobjc_link_runtime);
     return true;
   }
   return Args.hasArg(options::OPT_fobjc_link_runtime);
 }
 
 static bool forwardToGCC(const Option &O) {
   // Don't forward inputs from the original command line.  They are added from
   // InputInfoList.
   return O.getKind() != Option::InputClass &&
          !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput);
 }
 
 /// Add the C++ include args of other offloading toolchains. If this is a host
 /// job, the device toolchains are added. If this is a device job, the host
 /// toolchains will be added.
 static void addExtraOffloadCXXStdlibIncludeArgs(Compilation &C,
                                                 const JobAction &JA,
                                                 const ArgList &Args,
                                                 ArgStringList &CmdArgs) {
 
   if (JA.isHostOffloading(Action::OFK_Cuda))
     C.getSingleOffloadToolChain<Action::OFK_Cuda>()
         ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
   else if (JA.isDeviceOffloading(Action::OFK_Cuda))
     C.getSingleOffloadToolChain<Action::OFK_Host>()
         ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
 
   // TODO: Add support for other programming models here.
 }
 
 /// Add the include args that are specific of each offloading programming model.
 static void addExtraOffloadSpecificIncludeArgs(Compilation &C,
                                                const JobAction &JA,
                                                const ArgList &Args,
                                                ArgStringList &CmdArgs) {
 
   if (JA.isHostOffloading(Action::OFK_Cuda))
     C.getSingleOffloadToolChain<Action::OFK_Host>()->AddCudaIncludeArgs(
         Args, CmdArgs);
   else if (JA.isDeviceOffloading(Action::OFK_Cuda))
     C.getSingleOffloadToolChain<Action::OFK_Cuda>()->AddCudaIncludeArgs(
         Args, CmdArgs);
 
   // TODO: Add support for other programming models here.
 }
 
 void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
                                     const Driver &D, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs) const {
   Arg *A;
   const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
 
   CheckPreprocessingOptions(D, Args);
 
   Args.AddLastArg(CmdArgs, options::OPT_C);
   Args.AddLastArg(CmdArgs, options::OPT_CC);
 
   // Handle dependency file generation.
   if ((A = Args.getLastArg(options::OPT_M, options::OPT_MM)) ||
       (A = Args.getLastArg(options::OPT_MD)) ||
       (A = Args.getLastArg(options::OPT_MMD))) {
     // Determine the output location.
     const char *DepFile;
     if (Arg *MF = Args.getLastArg(options::OPT_MF)) {
       DepFile = MF->getValue();
       C.addFailureResultFile(DepFile, &JA);
     } else if (Output.getType() == types::TY_Dependencies) {
       DepFile = Output.getFilename();
     } else if (A->getOption().matches(options::OPT_M) ||
                A->getOption().matches(options::OPT_MM)) {
       DepFile = "-";
     } else {
       DepFile = getDependencyFileName(Args, Inputs);
       C.addFailureResultFile(DepFile, &JA);
     }
     CmdArgs.push_back("-dependency-file");
     CmdArgs.push_back(DepFile);
 
     // Add a default target if one wasn't specified.
     if (!Args.hasArg(options::OPT_MT) && !Args.hasArg(options::OPT_MQ)) {
       const char *DepTarget;
 
       // If user provided -o, that is the dependency target, except
       // when we are only generating a dependency file.
       Arg *OutputOpt = Args.getLastArg(options::OPT_o);
       if (OutputOpt && Output.getType() != types::TY_Dependencies) {
         DepTarget = OutputOpt->getValue();
       } else {
         // Otherwise derive from the base input.
         //
         // FIXME: This should use the computed output file location.
         SmallString<128> P(Inputs[0].getBaseInput());
         llvm::sys::path::replace_extension(P, "o");
         DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
       }
 
       CmdArgs.push_back("-MT");
       SmallString<128> Quoted;
       QuoteTarget(DepTarget, Quoted);
       CmdArgs.push_back(Args.MakeArgString(Quoted));
     }
 
     if (A->getOption().matches(options::OPT_M) ||
         A->getOption().matches(options::OPT_MD))
       CmdArgs.push_back("-sys-header-deps");
     if ((isa<PrecompileJobAction>(JA) &&
          !Args.hasArg(options::OPT_fno_module_file_deps)) ||
         Args.hasArg(options::OPT_fmodule_file_deps))
       CmdArgs.push_back("-module-file-deps");
   }
 
   if (Args.hasArg(options::OPT_MG)) {
     if (!A || A->getOption().matches(options::OPT_MD) ||
         A->getOption().matches(options::OPT_MMD))
       D.Diag(diag::err_drv_mg_requires_m_or_mm);
     CmdArgs.push_back("-MG");
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_MP);
   Args.AddLastArg(CmdArgs, options::OPT_MV);
 
   // Convert all -MQ <target> args to -MT <quoted target>
   for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) {
     A->claim();
 
     if (A->getOption().matches(options::OPT_MQ)) {
       CmdArgs.push_back("-MT");
       SmallString<128> Quoted;
       QuoteTarget(A->getValue(), Quoted);
       CmdArgs.push_back(Args.MakeArgString(Quoted));
 
       // -MT flag - no change
     } else {
       A->render(Args, CmdArgs);
     }
   }
 
   // Add -i* options, and automatically translate to
   // -include-pch/-include-pth for transparent PCH support. It's
   // wonky, but we include looking for .gch so we can support seamless
   // replacement into a build system already set up to be generating
   // .gch files.
   int YcIndex = -1, YuIndex = -1;
   {
     int AI = -1;
     const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
     const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
     for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
       // Walk the whole i_Group and skip non "-include" flags so that the index
       // here matches the index in the next loop below.
       ++AI;
       if (!A->getOption().matches(options::OPT_include))
         continue;
       if (YcArg && strcmp(A->getValue(), YcArg->getValue()) == 0)
         YcIndex = AI;
       if (YuArg && strcmp(A->getValue(), YuArg->getValue()) == 0)
         YuIndex = AI;
     }
   }
   if (isa<PrecompileJobAction>(JA) && YcIndex != -1) {
     Driver::InputList Inputs;
     D.BuildInputs(getToolChain(), C.getArgs(), Inputs);
     assert(Inputs.size() == 1 && "Need one input when building pch");
     CmdArgs.push_back(Args.MakeArgString(Twine("-find-pch-source=") +
                                          Inputs[0].second->getValue()));
   }
 
   bool RenderedImplicitInclude = false;
   int AI = -1;
   for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
     ++AI;
 
     if (getToolChain().getDriver().IsCLMode() &&
         A->getOption().matches(options::OPT_include)) {
       // In clang-cl mode, /Ycfoo.h means that all code up to a foo.h
       // include is compiled into foo.h, and everything after goes into
       // the .obj file. /Yufoo.h means that all includes prior to and including
       // foo.h are completely skipped and replaced with a use of the pch file
       // for foo.h.  (Each flag can have at most one value, multiple /Yc flags
       // just mean that the last one wins.)  If /Yc and /Yu are both present
       // and refer to the same file, /Yc wins.
       // Note that OPT__SLASH_FI gets mapped to OPT_include.
       // FIXME: The code here assumes that /Yc and /Yu refer to the same file.
       // cl.exe seems to support both flags with different values, but that
       // seems strange (which flag does /Fp now refer to?), so don't implement
       // that until someone needs it.
       int PchIndex = YcIndex != -1 ? YcIndex : YuIndex;
       if (PchIndex != -1) {
         if (isa<PrecompileJobAction>(JA)) {
           // When building the pch, skip all includes after the pch.
           assert(YcIndex != -1 && PchIndex == YcIndex);
           if (AI >= YcIndex)
             continue;
         } else {
           // When using the pch, skip all includes prior to the pch.
           if (AI < PchIndex) {
             A->claim();
             continue;
           }
           if (AI == PchIndex) {
             A->claim();
             CmdArgs.push_back("-include-pch");
             CmdArgs.push_back(
                 Args.MakeArgString(D.GetClPchPath(C, A->getValue())));
             continue;
           }
         }
       }
     } else if (A->getOption().matches(options::OPT_include)) {
       // Handling of gcc-style gch precompiled headers.
       bool IsFirstImplicitInclude = !RenderedImplicitInclude;
       RenderedImplicitInclude = true;
 
       // Use PCH if the user requested it.
       bool UsePCH = D.CCCUsePCH;
 
       bool FoundPTH = false;
       bool FoundPCH = false;
       SmallString<128> P(A->getValue());
       // We want the files to have a name like foo.h.pch. Add a dummy extension
       // so that replace_extension does the right thing.
       P += ".dummy";
       if (UsePCH) {
         llvm::sys::path::replace_extension(P, "pch");
         if (llvm::sys::fs::exists(P))
           FoundPCH = true;
       }
 
       if (!FoundPCH) {
         llvm::sys::path::replace_extension(P, "pth");
         if (llvm::sys::fs::exists(P))
           FoundPTH = true;
       }
 
       if (!FoundPCH && !FoundPTH) {
         llvm::sys::path::replace_extension(P, "gch");
         if (llvm::sys::fs::exists(P)) {
           FoundPCH = UsePCH;
           FoundPTH = !UsePCH;
         }
       }
 
       if (FoundPCH || FoundPTH) {
         if (IsFirstImplicitInclude) {
           A->claim();
           if (UsePCH)
             CmdArgs.push_back("-include-pch");
           else
             CmdArgs.push_back("-include-pth");
           CmdArgs.push_back(Args.MakeArgString(P));
           continue;
         } else {
           // Ignore the PCH if not first on command line and emit warning.
           D.Diag(diag::warn_drv_pch_not_first_include) << P
                                                        << A->getAsString(Args);
         }
       }
     } else if (A->getOption().matches(options::OPT_isystem_after)) {
       // Handling of paths which must come late.  These entries are handled by
       // the toolchain itself after the resource dir is inserted in the right
       // search order.
       // Do not claim the argument so that the use of the argument does not
       // silently go unnoticed on toolchains which do not honour the option.
       continue;
     }
 
     // Not translated, render as usual.
     A->claim();
     A->render(Args, CmdArgs);
   }
 
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_D, options::OPT_U, options::OPT_I_Group,
                    options::OPT_F, options::OPT_index_header_map});
 
   // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
   // FIXME: There is a very unfortunate problem here, some troubled
   // souls abuse -Wp, to pass preprocessor options in gcc syntax. To
   // really support that we would have to parse and then translate
   // those options. :(
   Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA,
                        options::OPT_Xpreprocessor);
 
   // -I- is a deprecated GCC feature, reject it.
   if (Arg *A = Args.getLastArg(options::OPT_I_))
     D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args);
 
   // If we have a --sysroot, and don't have an explicit -isysroot flag, add an
   // -isysroot to the CC1 invocation.
   StringRef sysroot = C.getSysRoot();
   if (sysroot != "") {
     if (!Args.hasArg(options::OPT_isysroot)) {
       CmdArgs.push_back("-isysroot");
       CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
     }
   }
 
   // Parse additional include paths from environment variables.
   // FIXME: We should probably sink the logic for handling these from the
   // frontend into the driver. It will allow deleting 4 otherwise unused flags.
   // CPATH - included following the user specified includes (but prior to
   // builtin and standard includes).
   addDirectoryList(Args, CmdArgs, "-I", "CPATH");
   // C_INCLUDE_PATH - system includes enabled when compiling C.
   addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH");
   // CPLUS_INCLUDE_PATH - system includes enabled when compiling C++.
   addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH");
   // OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC.
   addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH");
   // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
   addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");
 
   // While adding the include arguments, we also attempt to retrieve the
   // arguments of related offloading toolchains or arguments that are specific
   // of an offloading programming model.
 
   // Add C++ include arguments, if needed.
   if (types::isCXX(Inputs[0].getType())) {
     getToolChain().AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
     addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs);
   }
 
   // Add system include arguments for all targets but IAMCU.
   if (!IsIAMCU) {
     getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
     addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs);
   } else {
     // For IAMCU add special include arguments.
     getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs);
   }
 
   // Add offload include arguments, if needed.
   addExtraOffloadSpecificIncludeArgs(C, JA, Args, CmdArgs);
 }
 
 // FIXME: Move to target hook.
 static bool isSignedCharDefault(const llvm::Triple &Triple) {
   switch (Triple.getArch()) {
   default:
     return true;
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     if (Triple.isOSDarwin() || Triple.isOSWindows())
       return true;
     return false;
 
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
     if (Triple.isOSDarwin())
       return true;
     return false;
 
   case llvm::Triple::hexagon:
   case llvm::Triple::ppc64le:
   case llvm::Triple::systemz:
   case llvm::Triple::xcore:
     return false;
   }
 }
 
 static bool isNoCommonDefault(const llvm::Triple &Triple) {
   switch (Triple.getArch()) {
   default:
     return false;
 
   case llvm::Triple::xcore:
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     return true;
   }
 }
 
 // ARM tools start.
 
 // Get SubArch (vN).
 static int getARMSubArchVersionNumber(const llvm::Triple &Triple) {
   llvm::StringRef Arch = Triple.getArchName();
   return llvm::ARM::parseArchVersion(Arch);
 }
 
 // True if M-profile.
 static bool isARMMProfile(const llvm::Triple &Triple) {
   llvm::StringRef Arch = Triple.getArchName();
   unsigned Profile = llvm::ARM::parseArchProfile(Arch);
   return Profile == llvm::ARM::PK_M;
 }
 
 // Get Arch/CPU from args.
 static void getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch,
                                   llvm::StringRef &CPU, bool FromAs = false) {
   if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
     CPU = A->getValue();
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
     Arch = A->getValue();
   if (!FromAs)
     return;
 
   for (const Arg *A :
        Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
     StringRef Value = A->getValue();
     if (Value.startswith("-mcpu="))
       CPU = Value.substr(6);
     if (Value.startswith("-march="))
       Arch = Value.substr(7);
   }
 }
 
 // Handle -mhwdiv=.
 // FIXME: Use ARMTargetParser.
 static void getARMHWDivFeatures(const Driver &D, const Arg *A,
                                 const ArgList &Args, StringRef HWDiv,
                                 std::vector<const char *> &Features) {
   unsigned HWDivID = llvm::ARM::parseHWDiv(HWDiv);
   if (!llvm::ARM::getHWDivFeatures(HWDivID, Features))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 // Handle -mfpu=.
 static void getARMFPUFeatures(const Driver &D, const Arg *A,
                               const ArgList &Args, StringRef FPU,
                               std::vector<const char *> &Features) {
   unsigned FPUID = llvm::ARM::parseFPU(FPU);
   if (!llvm::ARM::getFPUFeatures(FPUID, Features))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 // Decode ARM features from string like +[no]featureA+[no]featureB+...
 static bool DecodeARMFeatures(const Driver &D, StringRef text,
                               std::vector<const char *> &Features) {
   SmallVector<StringRef, 8> Split;
   text.split(Split, StringRef("+"), -1, false);
 
   for (StringRef Feature : Split) {
     const char *FeatureName = llvm::ARM::getArchExtFeature(Feature);
     if (FeatureName)
       Features.push_back(FeatureName);
     else
       return false;
   }
   return true;
 }
 
 // Check if -march is valid by checking if it can be canonicalised and parsed.
 // getARMArch is used here instead of just checking the -march value in order
 // to handle -march=native correctly.
 static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args,
                              llvm::StringRef ArchName,
                              std::vector<const char *> &Features,
                              const llvm::Triple &Triple) {
   std::pair<StringRef, StringRef> Split = ArchName.split("+");
 
   std::string MArch = arm::getARMArch(ArchName, Triple);
   if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID ||
       (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 // Check -mcpu=. Needs ArchName to handle -mcpu=generic.
 static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args,
                             llvm::StringRef CPUName, llvm::StringRef ArchName,
                             std::vector<const char *> &Features,
                             const llvm::Triple &Triple) {
   std::pair<StringRef, StringRef> Split = CPUName.split("+");
 
   std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
   if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() ||
       (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 static bool useAAPCSForMachO(const llvm::Triple &T) {
   // The backend is hardwired to assume AAPCS for M-class processors, ensure
   // the frontend matches that.
   return T.getEnvironment() == llvm::Triple::EABI ||
          T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T);
 }
 
 // Select the float ABI as determined by -msoft-float, -mhard-float, and
 // -mfloat-abi=.
 arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
   const Driver &D = TC.getDriver();
   const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(Args));
   auto SubArch = getARMSubArchVersionNumber(Triple);
   arm::FloatABI ABI = FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float)) {
       ABI = FloatABI::Soft;
     } else if (A->getOption().matches(options::OPT_mhard_float)) {
       ABI = FloatABI::Hard;
     } else {
       ABI = llvm::StringSwitch<arm::FloatABI>(A->getValue())
                 .Case("soft", FloatABI::Soft)
                 .Case("softfp", FloatABI::SoftFP)
                 .Case("hard", FloatABI::Hard)
                 .Default(FloatABI::Invalid);
       if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
         ABI = FloatABI::Soft;
       }
     }
 
     // It is incorrect to select hard float ABI on MachO platforms if the ABI is
     // "apcs-gnu".
     if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) &&
         ABI == FloatABI::Hard) {
       D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args)
                                                        << Triple.getArchName();
     }
   }
 
   // If unspecified, choose the default based on the platform.
   if (ABI == FloatABI::Invalid) {
     switch (Triple.getOS()) {
     case llvm::Triple::Darwin:
     case llvm::Triple::MacOSX:
     case llvm::Triple::IOS:
     case llvm::Triple::TvOS: {
       // Darwin defaults to "softfp" for v6 and v7.
       ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft;
       ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI;
       break;
     }
     case llvm::Triple::WatchOS:
       ABI = FloatABI::Hard;
       break;
 
     // FIXME: this is invalid for WindowsCE
     case llvm::Triple::Win32:
       ABI = FloatABI::Hard;
       break;
 
     case llvm::Triple::FreeBSD:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
         ABI = FloatABI::Hard;
         break;
       default:
         // FreeBSD defaults to soft float
         ABI = FloatABI::Soft;
         break;
       }
       break;
 
     default:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
       case llvm::Triple::MuslEABIHF:
       case llvm::Triple::EABIHF:
         ABI = FloatABI::Hard;
         break;
       case llvm::Triple::GNUEABI:
       case llvm::Triple::MuslEABI:
       case llvm::Triple::EABI:
         // EABI is always AAPCS, and if it was not marked 'hard', it's softfp
         ABI = FloatABI::SoftFP;
         break;
       case llvm::Triple::Android:
         ABI = (SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft;
         break;
       default:
         // Assume "soft", but warn the user we are guessing.
         if (Triple.isOSBinFormatMachO() &&
             Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em)
           ABI = FloatABI::Hard;
         else
           ABI = FloatABI::Soft;
 
         if (Triple.getOS() != llvm::Triple::UnknownOS ||
             !Triple.isOSBinFormatMachO())
           D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
         break;
       }
     }
   }
 
   assert(ABI != FloatABI::Invalid && "must select an ABI");
   return ABI;
 }
 
 static void getARMTargetFeatures(const ToolChain &TC,
                                  const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features,
                                  bool ForAS) {
   const Driver &D = TC.getDriver();
 
   bool KernelOrKext =
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
   arm::FloatABI ABI = arm::getARMFloatABI(TC, Args);
   const Arg *WaCPU = nullptr, *WaFPU = nullptr;
   const Arg *WaHDiv = nullptr, *WaArch = nullptr;
 
   if (!ForAS) {
     // FIXME: Note, this is a hack, the LLVM backend doesn't actually use these
     // yet (it uses the -mfloat-abi and -msoft-float options), and it is
     // stripped out by the ARM target. We should probably pass this a new
     // -target-option, which is handled by the -cc1/-cc1as invocation.
     //
     // FIXME2:  For consistency, it would be ideal if we set up the target
     // machine state the same when using the frontend or the assembler. We don't
     // currently do that for the assembler, we pass the options directly to the
     // backend and never even instantiate the frontend TargetInfo. If we did,
     // and used its handleTargetFeatures hook, then we could ensure the
     // assembler and the frontend behave the same.
 
     // Use software floating point operations?
     if (ABI == arm::FloatABI::Soft)
       Features.push_back("+soft-float");
 
     // Use software floating point argument passing?
     if (ABI != arm::FloatABI::Hard)
       Features.push_back("+soft-float-abi");
   } else {
     // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down
     // to the assembler correctly.
     for (const Arg *A :
          Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
       StringRef Value = A->getValue();
       if (Value.startswith("-mfpu=")) {
         WaFPU = A;
       } else if (Value.startswith("-mcpu=")) {
         WaCPU = A;
       } else if (Value.startswith("-mhwdiv=")) {
         WaHDiv = A;
       } else if (Value.startswith("-march=")) {
         WaArch = A;
       }
     }
   }
 
   // Check -march. ClangAs gives preference to -Wa,-march=.
   const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ);
   StringRef ArchName;
   if (WaArch) {
     if (ArchArg)
       D.Diag(clang::diag::warn_drv_unused_argument)
           << ArchArg->getAsString(Args);
     ArchName = StringRef(WaArch->getValue()).substr(7);
     checkARMArchName(D, WaArch, Args, ArchName, Features, Triple);
     // FIXME: Set Arch.
     D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
   } else if (ArchArg) {
     ArchName = ArchArg->getValue();
     checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple);
   }
 
   // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=.
   const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
   StringRef CPUName;
   if (WaCPU) {
     if (CPUArg)
       D.Diag(clang::diag::warn_drv_unused_argument)
           << CPUArg->getAsString(Args);
     CPUName = StringRef(WaCPU->getValue()).substr(6);
     checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple);
   } else if (CPUArg) {
     CPUName = CPUArg->getValue();
     checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple);
   }
 
   // Add CPU features for generic CPUs
   if (CPUName == "native") {
     llvm::StringMap<bool> HostFeatures;
     if (llvm::sys::getHostCPUFeatures(HostFeatures))
       for (auto &F : HostFeatures)
         Features.push_back(
             Args.MakeArgString((F.second ? "+" : "-") + F.first()));
   }
 
   // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=.
   const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ);
   if (WaFPU) {
     if (FPUArg)
       D.Diag(clang::diag::warn_drv_unused_argument)
           << FPUArg->getAsString(Args);
     getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6),
                       Features);
   } else if (FPUArg) {
     getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features);
   }
 
   // Honor -mhwdiv=. ClangAs gives preference to -Wa,-mhwdiv=.
   const Arg *HDivArg = Args.getLastArg(options::OPT_mhwdiv_EQ);
   if (WaHDiv) {
     if (HDivArg)
       D.Diag(clang::diag::warn_drv_unused_argument)
           << HDivArg->getAsString(Args);
     getARMHWDivFeatures(D, WaHDiv, Args,
                         StringRef(WaHDiv->getValue()).substr(8), Features);
   } else if (HDivArg)
     getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features);
 
   // Setting -msoft-float effectively disables NEON because of the GCC
   // implementation, although the same isn't true of VFP or VFP3.
   if (ABI == arm::FloatABI::Soft) {
     Features.push_back("-neon");
     // Also need to explicitly disable features which imply NEON.
     Features.push_back("-crypto");
   }
 
   // En/disable crc code generation.
   if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) {
     if (A->getOption().matches(options::OPT_mcrc))
       Features.push_back("+crc");
     else
       Features.push_back("-crc");
   }
 
   // Look for the last occurrence of -mlong-calls or -mno-long-calls. If
   // neither options are specified, see if we are compiling for kernel/kext and
   // decide whether to pass "+long-calls" based on the OS and its version.
   if (Arg *A = Args.getLastArg(options::OPT_mlong_calls,
                                options::OPT_mno_long_calls)) {
     if (A->getOption().matches(options::OPT_mlong_calls))
       Features.push_back("+long-calls");
   } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) &&
              !Triple.isWatchOS()) {
       Features.push_back("+long-calls");
   }
 
   // Kernel code has more strict alignment requirements.
   if (KernelOrKext)
     Features.push_back("+strict-align");
   else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                     options::OPT_munaligned_access)) {
     if (A->getOption().matches(options::OPT_munaligned_access)) {
       // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
       if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
         D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
       // v8M Baseline follows on from v6M, so doesn't support unaligned memory
       // access either.
       else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline)
         D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base";
     } else
       Features.push_back("+strict-align");
   } else {
     // Assume pre-ARMv6 doesn't support unaligned accesses.
     //
     // ARMv6 may or may not support unaligned accesses depending on the
     // SCTLR.U bit, which is architecture-specific. We assume ARMv6
     // Darwin and NetBSD targets support unaligned accesses, and others don't.
     //
     // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
     // which raises an alignment fault on unaligned accesses. Linux
     // defaults this bit to 0 and handles it as a system-wide (not
     // per-process) setting. It is therefore safe to assume that ARMv7+
     // Linux targets support unaligned accesses. The same goes for NaCl.
     //
     // The above behavior is consistent with GCC.
     int VersionNum = getARMSubArchVersionNumber(Triple);
     if (Triple.isOSDarwin() || Triple.isOSNetBSD()) {
       if (VersionNum < 6 ||
           Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
         Features.push_back("+strict-align");
     } else if (Triple.isOSLinux() || Triple.isOSNaCl()) {
       if (VersionNum < 7)
         Features.push_back("+strict-align");
     } else
       Features.push_back("+strict-align");
   }
 
   // llvm does not support reserving registers in general. There is support
   // for reserving r9 on ARM though (defined as a platform-specific register
   // in ARM EABI).
   if (Args.hasArg(options::OPT_ffixed_r9))
     Features.push_back("+reserve-r9");
 
   // The kext linker doesn't know how to deal with movw/movt.
   if (KernelOrKext || Args.hasArg(options::OPT_mno_movt))
     Features.push_back("+no-movt");
 }
 
 void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
                              ArgStringList &CmdArgs, bool KernelOrKext) const {
   // Select the ABI to use.
   // FIXME: Support -meabi.
   // FIXME: Parts of this are duplicated in the backend, unify this somehow.
   const char *ABIName = nullptr;
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
     ABIName = A->getValue();
   } else if (Triple.isOSBinFormatMachO()) {
     if (useAAPCSForMachO(Triple)) {
       ABIName = "aapcs";
     } else if (Triple.isWatchABI()) {
       ABIName = "aapcs16";
     } else {
       ABIName = "apcs-gnu";
     }
   } else if (Triple.isOSWindows()) {
     // FIXME: this is invalid for WindowsCE
     ABIName = "aapcs";
   } else {
     // Select the default based on the platform.
     switch (Triple.getEnvironment()) {
     case llvm::Triple::Android:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::MuslEABI:
     case llvm::Triple::MuslEABIHF:
       ABIName = "aapcs-linux";
       break;
     case llvm::Triple::EABIHF:
     case llvm::Triple::EABI:
       ABIName = "aapcs";
       break;
     default:
       if (Triple.getOS() == llvm::Triple::NetBSD)
         ABIName = "apcs-gnu";
       else
         ABIName = "aapcs";
       break;
     }
   }
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName);
 
   // Determine floating point ABI from the options & target defaults.
   arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
   if (ABI == arm::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
     // FIXME: This changes CPP defines, we need -target-soft-float.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else if (ABI == arm::FloatABI::SoftFP) {
     // Floating point operations are hard, but argument passing is soft.
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
     assert(ABI == arm::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
 
   // Forward the -mglobal-merge option for explicit control over the pass.
   if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
                                options::OPT_mno_global_merge)) {
     CmdArgs.push_back("-backend-option");
     if (A->getOption().matches(options::OPT_mno_global_merge))
       CmdArgs.push_back("-arm-global-merge=false");
     else
       CmdArgs.push_back("-arm-global-merge=true");
   }
 
   if (!Args.hasFlag(options::OPT_mimplicit_float,
                     options::OPT_mno_implicit_float, true))
     CmdArgs.push_back("-no-implicit-float");
 }
 // ARM tools end.
 
 /// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are
 /// targeting.
 static std::string getAArch64TargetCPU(const ArgList &Args) {
   Arg *A;
   std::string CPU;
   // If we have -mtune or -mcpu, use that.
   if ((A = Args.getLastArg(options::OPT_mtune_EQ))) {
     CPU = StringRef(A->getValue()).lower();
   } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
     StringRef Mcpu = A->getValue();
     CPU = Mcpu.split("+").first.lower();
   }
 
   // Handle CPU name is 'native'.
   if (CPU == "native")
     return llvm::sys::getHostCPUName();
   else if (CPU.size())
     return CPU;
 
   // Make sure we pick "cyclone" if -arch is used.
   // FIXME: Should this be picked by checking the target triple instead?
   if (Args.getLastArg(options::OPT_arch))
     return "cyclone";
 
   return "generic";
 }
 
 void Clang::AddAArch64TargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   llvm::Triple Triple(TripleStr);
 
   if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) ||
       Args.hasArg(options::OPT_mkernel) ||
       Args.hasArg(options::OPT_fapple_kext))
     CmdArgs.push_back("-disable-red-zone");
 
   if (!Args.hasFlag(options::OPT_mimplicit_float,
                     options::OPT_mno_implicit_float, true))
     CmdArgs.push_back("-no-implicit-float");
 
   const char *ABIName = nullptr;
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
     ABIName = A->getValue();
   else if (Triple.isOSDarwin())
     ABIName = "darwinpcs";
   else
     ABIName = "aapcs";
 
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName);
 
   if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769,
                                options::OPT_mno_fix_cortex_a53_835769)) {
     CmdArgs.push_back("-backend-option");
     if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769))
       CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
     else
       CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0");
   } else if (Triple.isAndroid()) {
     // Enabled A53 errata (835769) workaround by default on android
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
   }
 
   // Forward the -mglobal-merge option for explicit control over the pass.
   if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
                                options::OPT_mno_global_merge)) {
     CmdArgs.push_back("-backend-option");
     if (A->getOption().matches(options::OPT_mno_global_merge))
       CmdArgs.push_back("-aarch64-global-merge=false");
     else
       CmdArgs.push_back("-aarch64-global-merge=true");
   }
 }
 
 // Get CPU and ABI names. They are not independent
 // so we have to calculate them together.
 void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
                             StringRef &CPUName, StringRef &ABIName) {
   const char *DefMips32CPU = "mips32r2";
   const char *DefMips64CPU = "mips64r2";
 
   // MIPS32r6 is the default for mips(el)?-img-linux-gnu and MIPS64r6 is the
   // default for mips64(el)?-img-linux-gnu.
   if (Triple.getVendor() == llvm::Triple::ImaginationTechnologies &&
       Triple.getEnvironment() == llvm::Triple::GNU) {
     DefMips32CPU = "mips32r6";
     DefMips64CPU = "mips64r6";
   }
 
   // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android).
   if (Triple.isAndroid()) {
     DefMips32CPU = "mips32";
     DefMips64CPU = "mips64r6";
   }
 
   // MIPS3 is the default for mips64*-unknown-openbsd.
   if (Triple.getOS() == llvm::Triple::OpenBSD)
     DefMips64CPU = "mips3";
 
   if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ))
     CPUName = A->getValue();
 
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
     ABIName = A->getValue();
     // Convert a GNU style Mips ABI name to the name
     // accepted by LLVM Mips backend.
     ABIName = llvm::StringSwitch<llvm::StringRef>(ABIName)
                   .Case("32", "o32")
                   .Case("64", "n64")
                   .Default(ABIName);
   }
 
   // Setup default CPU and ABI names.
   if (CPUName.empty() && ABIName.empty()) {
     switch (Triple.getArch()) {
     default:
       llvm_unreachable("Unexpected triple arch name");
     case llvm::Triple::mips:
     case llvm::Triple::mipsel:
       CPUName = DefMips32CPU;
       break;
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
       CPUName = DefMips64CPU;
       break;
     }
   }
 
   if (ABIName.empty() &&
       (Triple.getVendor() == llvm::Triple::MipsTechnologies ||
        Triple.getVendor() == llvm::Triple::ImaginationTechnologies)) {
     ABIName = llvm::StringSwitch<const char *>(CPUName)
                   .Case("mips1", "o32")
                   .Case("mips2", "o32")
                   .Case("mips3", "n64")
                   .Case("mips4", "n64")
                   .Case("mips5", "n64")
                   .Case("mips32", "o32")
                   .Case("mips32r2", "o32")
                   .Case("mips32r3", "o32")
                   .Case("mips32r5", "o32")
                   .Case("mips32r6", "o32")
                   .Case("mips64", "n64")
                   .Case("mips64r2", "n64")
                   .Case("mips64r3", "n64")
                   .Case("mips64r5", "n64")
                   .Case("mips64r6", "n64")
                   .Case("octeon", "n64")
                   .Case("p5600", "o32")
                   .Default("");
   }
 
   if (ABIName.empty()) {
     // Deduce ABI name from the target triple.
     if (Triple.getArch() == llvm::Triple::mips ||
         Triple.getArch() == llvm::Triple::mipsel)
       ABIName = "o32";
     else
       ABIName = "n64";
   }
 
   if (CPUName.empty()) {
     // Deduce CPU name from ABI name.
     CPUName = llvm::StringSwitch<const char *>(ABIName)
                   .Case("o32", DefMips32CPU)
                   .Cases("n32", "n64", DefMips64CPU)
                   .Default("");
   }
 
   // FIXME: Warn on inconsistent use of -march and -mabi.
 }
 
 std::string mips::getMipsABILibSuffix(const ArgList &Args,
                                       const llvm::Triple &Triple) {
   StringRef CPUName, ABIName;
   tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
   return llvm::StringSwitch<std::string>(ABIName)
       .Case("o32", "")
       .Case("n32", "32")
       .Case("n64", "64");
 }
 
 // Convert ABI name to the GNU tools acceptable variant.
 static StringRef getGnuCompatibleMipsABIName(StringRef ABI) {
   return llvm::StringSwitch<llvm::StringRef>(ABI)
       .Case("o32", "32")
       .Case("n64", "64")
       .Default(ABI);
 }
 
 // Select the MIPS float ABI as determined by -msoft-float, -mhard-float,
 // and -mfloat-abi=.
 static mips::FloatABI getMipsFloatABI(const Driver &D, const ArgList &Args) {
   mips::FloatABI ABI = mips::FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
       ABI = mips::FloatABI::Soft;
     else if (A->getOption().matches(options::OPT_mhard_float))
       ABI = mips::FloatABI::Hard;
     else {
       ABI = llvm::StringSwitch<mips::FloatABI>(A->getValue())
                 .Case("soft", mips::FloatABI::Soft)
                 .Case("hard", mips::FloatABI::Hard)
                 .Default(mips::FloatABI::Invalid);
       if (ABI == mips::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
         ABI = mips::FloatABI::Hard;
       }
     }
   }
 
   // If unspecified, choose the default based on the platform.
   if (ABI == mips::FloatABI::Invalid) {
     // Assume "hard", because it's a default value used by gcc.
     // When we start to recognize specific target MIPS processors,
     // we will be able to select the default more correctly.
     ABI = mips::FloatABI::Hard;
   }
 
   assert(ABI != mips::FloatABI::Invalid && "must select an ABI");
   return ABI;
 }
 
 static void AddTargetFeature(const ArgList &Args,
                              std::vector<const char *> &Features,
                              OptSpecifier OnOpt, OptSpecifier OffOpt,
                              StringRef FeatureName) {
   if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) {
     if (A->getOption().matches(OnOpt))
       Features.push_back(Args.MakeArgString("+" + FeatureName));
     else
       Features.push_back(Args.MakeArgString("-" + FeatureName));
   }
 }
 
 static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                   const ArgList &Args,
                                   std::vector<const char *> &Features) {
   StringRef CPUName;
   StringRef ABIName;
   mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
   ABIName = getGnuCompatibleMipsABIName(ABIName);
 
   AddTargetFeature(Args, Features, options::OPT_mno_abicalls,
                    options::OPT_mabicalls, "noabicalls");
 
   mips::FloatABI FloatABI = getMipsFloatABI(D, Args);
   if (FloatABI == mips::FloatABI::Soft) {
     // FIXME: Note, this is a hack. We need to pass the selected float
     // mode to the MipsTargetInfoBase to define appropriate macros there.
     // Now it is the only method.
     Features.push_back("+soft-float");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
     StringRef Val = StringRef(A->getValue());
     if (Val == "2008") {
       if (mips::getSupportedNanEncoding(CPUName) & mips::Nan2008)
         Features.push_back("+nan2008");
       else {
         Features.push_back("-nan2008");
         D.Diag(diag::warn_target_unsupported_nan2008) << CPUName;
       }
     } else if (Val == "legacy") {
       if (mips::getSupportedNanEncoding(CPUName) & mips::NanLegacy)
         Features.push_back("-nan2008");
       else {
         Features.push_back("+nan2008");
         D.Diag(diag::warn_target_unsupported_nanlegacy) << CPUName;
       }
     } else
       D.Diag(diag::err_drv_unsupported_option_argument)
           << A->getOption().getName() << Val;
   }
 
   AddTargetFeature(Args, Features, options::OPT_msingle_float,
                    options::OPT_mdouble_float, "single-float");
   AddTargetFeature(Args, Features, options::OPT_mips16, options::OPT_mno_mips16,
                    "mips16");
   AddTargetFeature(Args, Features, options::OPT_mmicromips,
                    options::OPT_mno_micromips, "micromips");
   AddTargetFeature(Args, Features, options::OPT_mdsp, options::OPT_mno_dsp,
                    "dsp");
   AddTargetFeature(Args, Features, options::OPT_mdspr2, options::OPT_mno_dspr2,
                    "dspr2");
   AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa,
                    "msa");
 
   // Add the last -mfp32/-mfpxx/-mfp64, if none are given and the ABI is O32
   // pass -mfpxx, or if none are given and fp64a is default, pass fp64 and
   // nooddspreg.
   if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                options::OPT_mfp64)) {
     if (A->getOption().matches(options::OPT_mfp32))
       Features.push_back(Args.MakeArgString("-fp64"));
     else if (A->getOption().matches(options::OPT_mfpxx)) {
       Features.push_back(Args.MakeArgString("+fpxx"));
       Features.push_back(Args.MakeArgString("+nooddspreg"));
     } else
       Features.push_back(Args.MakeArgString("+fp64"));
   } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) {
     Features.push_back(Args.MakeArgString("+fpxx"));
     Features.push_back(Args.MakeArgString("+nooddspreg"));
   } else if (mips::isFP64ADefault(Triple, CPUName)) {
     Features.push_back(Args.MakeArgString("+fp64"));
     Features.push_back(Args.MakeArgString("+nooddspreg"));
   }
 
   AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg,
                    options::OPT_modd_spreg, "nooddspreg");
 }
 
 void Clang::AddMIPSTargetArgs(const ArgList &Args,
                               ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
   StringRef CPUName;
   StringRef ABIName;
   const llvm::Triple &Triple = getToolChain().getTriple();
   mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
 
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName.data());
 
   mips::FloatABI ABI = getMipsFloatABI(D, Args);
   if (ABI == mips::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
     assert(ABI == mips::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mxgot, options::OPT_mno_xgot)) {
     if (A->getOption().matches(options::OPT_mxgot)) {
       CmdArgs.push_back("-mllvm");
       CmdArgs.push_back("-mxgot");
     }
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1,
                                options::OPT_mno_ldc1_sdc1)) {
     if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) {
       CmdArgs.push_back("-mllvm");
       CmdArgs.push_back("-mno-ldc1-sdc1");
     }
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
                                options::OPT_mno_check_zero_division)) {
     if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
       CmdArgs.push_back("-mllvm");
       CmdArgs.push_back("-mno-check-zero-division");
     }
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_G)) {
     StringRef v = A->getValue();
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
     A->claim();
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) {
     StringRef Val = StringRef(A->getValue());
     if (mips::hasCompactBranches(CPUName)) {
       if (Val == "never" || Val == "always" || Val == "optimal") {
         CmdArgs.push_back("-mllvm");
         CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val));
       } else
         D.Diag(diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Val;
     } else
       D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
   }
 }
 
 /// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting.
 static std::string getPPCTargetCPU(const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     StringRef CPUName = A->getValue();
 
     if (CPUName == "native") {
       std::string CPU = llvm::sys::getHostCPUName();
       if (!CPU.empty() && CPU != "generic")
         return CPU;
       else
         return "";
     }
 
     return llvm::StringSwitch<const char *>(CPUName)
         .Case("common", "generic")
         .Case("440", "440")
         .Case("440fp", "440")
         .Case("450", "450")
         .Case("601", "601")
         .Case("602", "602")
         .Case("603", "603")
         .Case("603e", "603e")
         .Case("603ev", "603ev")
         .Case("604", "604")
         .Case("604e", "604e")
         .Case("620", "620")
         .Case("630", "pwr3")
         .Case("G3", "g3")
         .Case("7400", "7400")
         .Case("G4", "g4")
         .Case("7450", "7450")
         .Case("G4+", "g4+")
         .Case("750", "750")
         .Case("970", "970")
         .Case("G5", "g5")
         .Case("a2", "a2")
         .Case("a2q", "a2q")
         .Case("e500mc", "e500mc")
         .Case("e5500", "e5500")
         .Case("power3", "pwr3")
         .Case("power4", "pwr4")
         .Case("power5", "pwr5")
         .Case("power5x", "pwr5x")
         .Case("power6", "pwr6")
         .Case("power6x", "pwr6x")
         .Case("power7", "pwr7")
         .Case("power8", "pwr8")
         .Case("power9", "pwr9")
         .Case("pwr3", "pwr3")
         .Case("pwr4", "pwr4")
         .Case("pwr5", "pwr5")
         .Case("pwr5x", "pwr5x")
         .Case("pwr6", "pwr6")
         .Case("pwr6x", "pwr6x")
         .Case("pwr7", "pwr7")
         .Case("pwr8", "pwr8")
         .Case("pwr9", "pwr9")
         .Case("powerpc", "ppc")
         .Case("powerpc64", "ppc64")
         .Case("powerpc64le", "ppc64le")
         .Default("");
   }
 
   return "";
 }
 
 static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features) {
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group);
 
   ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args);
-  if (FloatABI == ppc::FloatABI::Soft &&
-      !(Triple.getArch() == llvm::Triple::ppc64 ||
-        Triple.getArch() == llvm::Triple::ppc64le))
-    Features.push_back("+soft-float");
-  else if (FloatABI == ppc::FloatABI::Soft &&
-           (Triple.getArch() == llvm::Triple::ppc64 ||
-            Triple.getArch() == llvm::Triple::ppc64le))
-    D.Diag(diag::err_drv_invalid_mfloat_abi)
-        << "soft float is not supported for ppc64";
+  if (FloatABI == ppc::FloatABI::Soft)
+    Features.push_back("-hard-float");
 
   // Altivec is a bit weird, allow overriding of the Altivec feature here.
   AddTargetFeature(Args, Features, options::OPT_faltivec,
                    options::OPT_fno_altivec, "altivec");
 }
 
 ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) {
   ppc::FloatABI ABI = ppc::FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
       ABI = ppc::FloatABI::Soft;
     else if (A->getOption().matches(options::OPT_mhard_float))
       ABI = ppc::FloatABI::Hard;
     else {
       ABI = llvm::StringSwitch<ppc::FloatABI>(A->getValue())
                 .Case("soft", ppc::FloatABI::Soft)
                 .Case("hard", ppc::FloatABI::Hard)
                 .Default(ppc::FloatABI::Invalid);
       if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
         ABI = ppc::FloatABI::Hard;
       }
     }
   }
 
   // If unspecified, choose the default based on the platform.
   if (ABI == ppc::FloatABI::Invalid) {
     ABI = ppc::FloatABI::Hard;
   }
 
   return ABI;
 }
 
 void Clang::AddPPCTargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
   // Select the ABI to use.
   const char *ABIName = nullptr;
   if (getToolChain().getTriple().isOSLinux())
     switch (getToolChain().getArch()) {
     case llvm::Triple::ppc64: {
       // When targeting a processor that supports QPX, or if QPX is
       // specifically enabled, default to using the ABI that supports QPX (so
       // long as it is not specifically disabled).
       bool HasQPX = false;
       if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
         HasQPX = A->getValue() == StringRef("a2q");
       HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX);
       if (HasQPX) {
         ABIName = "elfv1-qpx";
         break;
       }
 
       ABIName = "elfv1";
       break;
     }
     case llvm::Triple::ppc64le:
       ABIName = "elfv2";
       break;
     default:
       break;
     }
 
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
     // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
     // the option if given as we don't have backend support for any targets
     // that don't use the altivec abi.
     if (StringRef(A->getValue()) != "altivec")
       ABIName = A->getValue();
 
   ppc::FloatABI FloatABI =
       ppc::getPPCFloatABI(getToolChain().getDriver(), Args);
 
   if (FloatABI == ppc::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
     assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
 
   if (ABIName) {
     CmdArgs.push_back("-target-abi");
     CmdArgs.push_back(ABIName);
   }
 }
 
 bool ppc::hasPPCAbiArg(const ArgList &Args, const char *Value) {
   Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
   return A && (A->getValue() == StringRef(Value));
 }
 
 /// Get the (LLVM) name of the R600 gpu we are targeting.
 static std::string getR600TargetGPU(const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     const char *GPUName = A->getValue();
     return llvm::StringSwitch<const char *>(GPUName)
         .Cases("rv630", "rv635", "r600")
         .Cases("rv610", "rv620", "rs780", "rs880")
         .Case("rv740", "rv770")
         .Case("palm", "cedar")
         .Cases("sumo", "sumo2", "sumo")
         .Case("hemlock", "cypress")
         .Case("aruba", "cayman")
         .Default(GPUName);
   }
   return "";
 }
 
 static std::string getLanaiTargetCPU(const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     return A->getValue();
   }
   return "";
 }
 
 sparc::FloatABI sparc::getSparcFloatABI(const Driver &D,
                                         const ArgList &Args) {
   sparc::FloatABI ABI = sparc::FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
       ABI = sparc::FloatABI::Soft;
     else if (A->getOption().matches(options::OPT_mhard_float))
       ABI = sparc::FloatABI::Hard;
     else {
       ABI = llvm::StringSwitch<sparc::FloatABI>(A->getValue())
                 .Case("soft", sparc::FloatABI::Soft)
                 .Case("hard", sparc::FloatABI::Hard)
                 .Default(sparc::FloatABI::Invalid);
       if (ABI == sparc::FloatABI::Invalid &&
           !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
         ABI = sparc::FloatABI::Hard;
       }
     }
   }
 
   // If unspecified, choose the default based on the platform.
   // Only the hard-float ABI on Sparc is standardized, and it is the
   // default. GCC also supports a nonstandard soft-float ABI mode, also
   // implemented in LLVM. However as this is not standard we set the default
   // to be hard-float.
   if (ABI == sparc::FloatABI::Invalid) {
     ABI = sparc::FloatABI::Hard;
   }
 
   return ABI;
 }
 
 static void getSparcTargetFeatures(const Driver &D, const ArgList &Args,
                                  std::vector<const char *> &Features) {
   sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args);
   if (FloatABI == sparc::FloatABI::Soft)
     Features.push_back("+soft-float");
 }
 
 void Clang::AddSparcTargetArgs(const ArgList &Args,
                                ArgStringList &CmdArgs) const {
   sparc::FloatABI FloatABI =
       sparc::getSparcFloatABI(getToolChain().getDriver(), Args);
 
   if (FloatABI == sparc::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
     assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
 }
 
 void Clang::AddSystemZTargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
   if (Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false))
     CmdArgs.push_back("-mbackchain");
 }
 
 static const char *getSystemZTargetCPU(const ArgList &Args) {
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
     return A->getValue();
   return "z10";
 }
 
 static void getSystemZTargetFeatures(const ArgList &Args,
                                      std::vector<const char *> &Features) {
   // -m(no-)htm overrides use of the transactional-execution facility.
   if (Arg *A = Args.getLastArg(options::OPT_mhtm, options::OPT_mno_htm)) {
     if (A->getOption().matches(options::OPT_mhtm))
       Features.push_back("+transactional-execution");
     else
       Features.push_back("-transactional-execution");
   }
   // -m(no-)vx overrides use of the vector facility.
   if (Arg *A = Args.getLastArg(options::OPT_mvx, options::OPT_mno_vx)) {
     if (A->getOption().matches(options::OPT_mvx))
       Features.push_back("+vector");
     else
       Features.push_back("-vector");
   }
 }
 
 static const char *getX86TargetCPU(const ArgList &Args,
                                    const llvm::Triple &Triple) {
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
     if (StringRef(A->getValue()) != "native") {
       if (Triple.isOSDarwin() && Triple.getArchName() == "x86_64h")
         return "core-avx2";
 
       return A->getValue();
     }
 
     // FIXME: Reject attempts to use -march=native unless the target matches
     // the host.
     //
     // FIXME: We should also incorporate the detected target features for use
     // with -native.
     std::string CPU = llvm::sys::getHostCPUName();
     if (!CPU.empty() && CPU != "generic")
       return Args.MakeArgString(CPU);
   }
 
   if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
     // Mapping built by referring to X86TargetInfo::getDefaultFeatures().
     StringRef Arch = A->getValue();
     const char *CPU;
     if (Triple.getArch() == llvm::Triple::x86) {
       CPU = llvm::StringSwitch<const char *>(Arch)
                 .Case("IA32", "i386")
                 .Case("SSE", "pentium3")
                 .Case("SSE2", "pentium4")
                 .Case("AVX", "sandybridge")
                 .Case("AVX2", "haswell")
                 .Default(nullptr);
     } else {
       CPU = llvm::StringSwitch<const char *>(Arch)
                 .Case("AVX", "sandybridge")
                 .Case("AVX2", "haswell")
                 .Default(nullptr);
     }
     if (CPU)
       return CPU;
   }
 
   // Select the default CPU if none was given (or detection failed).
 
   if (Triple.getArch() != llvm::Triple::x86_64 &&
       Triple.getArch() != llvm::Triple::x86)
     return nullptr; // This routine is only handling x86 targets.
 
   bool Is64Bit = Triple.getArch() == llvm::Triple::x86_64;
 
   // FIXME: Need target hooks.
   if (Triple.isOSDarwin()) {
     if (Triple.getArchName() == "x86_64h")
       return "core-avx2";
     return Is64Bit ? "core2" : "yonah";
   }
 
   // Set up default CPU name for PS4 compilers.
   if (Triple.isPS4CPU())
     return "btver2";
 
   // On Android use targets compatible with gcc
   if (Triple.isAndroid())
     return Is64Bit ? "x86-64" : "i686";
 
   // Everything else goes to x86-64 in 64-bit mode.
   if (Is64Bit)
     return "x86-64";
 
   switch (Triple.getOS()) {
   case llvm::Triple::FreeBSD:
   case llvm::Triple::NetBSD:
   case llvm::Triple::OpenBSD:
     return "i486";
   case llvm::Triple::Haiku:
     return "i586";
   case llvm::Triple::Bitrig:
     return "i686";
   default:
     // Fallback to p4.
     return "pentium4";
   }
 }
 
 /// Get the (LLVM) name of the WebAssembly cpu we are targeting.
 static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
   // If we have -mcpu=, use that.
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     StringRef CPU = A->getValue();
 
 #ifdef __wasm__
     // Handle "native" by examining the host. "native" isn't meaningful when
     // cross compiling, so only support this when the host is also WebAssembly.
     if (CPU == "native")
       return llvm::sys::getHostCPUName();
 #endif
 
     return CPU;
   }
 
   return "generic";
 }
 
 static std::string getCPUName(const ArgList &Args, const llvm::Triple &T,
                               bool FromAs = false) {
   switch (T.getArch()) {
   default:
     return "";
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
     return getAArch64TargetCPU(Args);
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
     StringRef MArch, MCPU;
     getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs);
     return arm::getARMTargetCPU(MCPU, MArch, T);
   }
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, T, CPUName, ABIName);
     return CPUName;
   }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
     if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
       return A->getValue();
     return "";
 
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le: {
     std::string TargetCPUName = getPPCTargetCPU(Args);
     // LLVM may default to generating code for the native CPU,
     // but, like gcc, we default to a more generic option for
     // each architecture. (except on Darwin)
     if (TargetCPUName.empty() && !T.isOSDarwin()) {
       if (T.getArch() == llvm::Triple::ppc64)
         TargetCPUName = "ppc64";
       else if (T.getArch() == llvm::Triple::ppc64le)
         TargetCPUName = "ppc64le";
       else
         TargetCPUName = "ppc";
     }
     return TargetCPUName;
   }
 
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
     if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
       return A->getValue();
     return "";
 
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     return getX86TargetCPU(Args, T);
 
   case llvm::Triple::hexagon:
     return "hexagon" +
            toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
 
   case llvm::Triple::lanai:
     return getLanaiTargetCPU(Args);
 
   case llvm::Triple::systemz:
     return getSystemZTargetCPU(Args);
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
     return getR600TargetGPU(Args);
 
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     return getWebAssemblyTargetCPU(Args);
   }
 }
 
 static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
                           ArgStringList &CmdArgs, bool IsThinLTO) {
   // Tell the linker to load the plugin. This has to come before AddLinkerInputs
   // as gold requires -plugin to come before any -plugin-opt that -Wl might
   // forward.
   CmdArgs.push_back("-plugin");
   std::string Plugin =
       ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so";
   CmdArgs.push_back(Args.MakeArgString(Plugin));
 
   // Try to pass driver level flags relevant to LTO code generation down to
   // the plugin.
 
   // Handle flags for selecting CPU variants.
   std::string CPU = getCPUName(Args, ToolChain.getTriple());
   if (!CPU.empty())
     CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
 
   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
     StringRef OOpt;
     if (A->getOption().matches(options::OPT_O4) ||
         A->getOption().matches(options::OPT_Ofast))
       OOpt = "3";
     else if (A->getOption().matches(options::OPT_O))
       OOpt = A->getValue();
     else if (A->getOption().matches(options::OPT_O0))
       OOpt = "0";
     if (!OOpt.empty())
       CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt));
   }
 
   if (IsThinLTO)
     CmdArgs.push_back("-plugin-opt=thinlto");
 
   // If an explicit debugger tuning argument appeared, pass it along.
   if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
                                options::OPT_ggdbN_Group)) {
     if (A->getOption().matches(options::OPT_glldb))
       CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb");
     else if (A->getOption().matches(options::OPT_gsce))
       CmdArgs.push_back("-plugin-opt=-debugger-tune=sce");
     else
       CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb");
   }
 }
 
 /// This is a helper function for validating the optional refinement step
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
 static bool getRefinementStep(StringRef In, const Driver &D,
                               const Arg &A, size_t &Position) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
     StringRef Option = A.getOption().getName();
     StringRef RefStep = In.substr(Position + 1);
     // Allow exactly one numeric character for the additional refinement
     // step parameter. This is reasonable for all currently-supported
     // operations and architectures because we would expect that a larger value
     // of refinement steps would cause the estimate "optimization" to
     // under-perform the native operation. Also, if the estimate does not
     // converge quickly, it probably will not ever converge, so further
     // refinement steps will not produce a better answer.
     if (RefStep.size() != 1) {
       D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
       return false;
     }
     char RefStepChar = RefStep[0];
     if (RefStepChar < '0' || RefStepChar > '9') {
       D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
       return false;
     }
   }
   return true;
 }
 
 /// The -mrecip flag requires processing of many optional parameters.
 static void ParseMRecip(const Driver &D, const ArgList &Args,
                         ArgStringList &OutStrings) {
   StringRef DisabledPrefixIn = "!";
   StringRef DisabledPrefixOut = "!";
   StringRef EnabledPrefixOut = "";
   StringRef Out = "-mrecip=";
 
   Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
   if (!A)
     return;
 
   unsigned NumOptions = A->getNumValues();
   if (NumOptions == 0) {
     // No option is the same as "all".
     OutStrings.push_back(Args.MakeArgString(Out + "all"));
     return;
   }
 
   // Pass through "all", "none", or "default" with an optional refinement step.
   if (NumOptions == 1) {
     StringRef Val = A->getValue(0);
     size_t RefStepLoc;
     if (!getRefinementStep(Val, D, *A, RefStepLoc))
       return;
     StringRef ValBase = Val.slice(0, RefStepLoc);
     if (ValBase == "all" || ValBase == "none" || ValBase == "default") {
       OutStrings.push_back(Args.MakeArgString(Out + Val));
       return;
     }
   }
 
   // Each reciprocal type may be enabled or disabled individually.
   // Check each input value for validity, concatenate them all back together,
   // and pass through.
 
   llvm::StringMap<bool> OptionStrings;
   OptionStrings.insert(std::make_pair("divd", false));
   OptionStrings.insert(std::make_pair("divf", false));
   OptionStrings.insert(std::make_pair("vec-divd", false));
   OptionStrings.insert(std::make_pair("vec-divf", false));
   OptionStrings.insert(std::make_pair("sqrtd", false));
   OptionStrings.insert(std::make_pair("sqrtf", false));
   OptionStrings.insert(std::make_pair("vec-sqrtd", false));
   OptionStrings.insert(std::make_pair("vec-sqrtf", false));
 
   for (unsigned i = 0; i != NumOptions; ++i) {
     StringRef Val = A->getValue(i);
 
     bool IsDisabled = Val.startswith(DisabledPrefixIn);
     // Ignore the disablement token for string matching.
     if (IsDisabled)
       Val = Val.substr(1);
 
     size_t RefStep;
     if (!getRefinementStep(Val, D, *A, RefStep))
       return;
 
     StringRef ValBase = Val.slice(0, RefStep);
     llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
     if (OptionIter == OptionStrings.end()) {
       // Try again specifying float suffix.
       OptionIter = OptionStrings.find(ValBase.str() + 'f');
       if (OptionIter == OptionStrings.end()) {
         // The input name did not match any known option string.
         D.Diag(diag::err_drv_unknown_argument) << Val;
         return;
       }
       // The option was specified without a float or double suffix.
       // Make sure that the double entry was not already specified.
       // The float entry will be checked below.
       if (OptionStrings[ValBase.str() + 'd']) {
         D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
         return;
       }
     }
 
     if (OptionIter->second == true) {
       // Duplicate option specified.
       D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
       return;
     }
 
     // Mark the matched option as found. Do not allow duplicate specifiers.
     OptionIter->second = true;
 
     // If the precision was not specified, also mark the double entry as found.
     if (ValBase.back() != 'f' && ValBase.back() != 'd')
       OptionStrings[ValBase.str() + 'd'] = true;
 
     // Build the output string.
     StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
     Out = Args.MakeArgString(Out + Prefix + Val);
     if (i != NumOptions - 1)
       Out = Args.MakeArgString(Out + ",");
   }
 
   OutStrings.push_back(Args.MakeArgString(Out));
 }
 
 static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features) {
   // If -march=native, autodetect the feature list.
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
     if (StringRef(A->getValue()) == "native") {
       llvm::StringMap<bool> HostFeatures;
       if (llvm::sys::getHostCPUFeatures(HostFeatures))
         for (auto &F : HostFeatures)
           Features.push_back(
               Args.MakeArgString((F.second ? "+" : "-") + F.first()));
     }
   }
 
   if (Triple.getArchName() == "x86_64h") {
     // x86_64h implies quite a few of the more modern subtarget features
     // for Haswell class CPUs, but not all of them. Opt-out of a few.
     Features.push_back("-rdrnd");
     Features.push_back("-aes");
     Features.push_back("-pclmul");
     Features.push_back("-rtm");
     Features.push_back("-hle");
     Features.push_back("-fsgsbase");
   }
 
   const llvm::Triple::ArchType ArchType = Triple.getArch();
   // Add features to be compatible with gcc for Android.
   if (Triple.isAndroid()) {
     if (ArchType == llvm::Triple::x86_64) {
       Features.push_back("+sse4.2");
       Features.push_back("+popcnt");
     } else
       Features.push_back("+ssse3");
   }
 
   // Set features according to the -arch flag on MSVC.
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
     StringRef Arch = A->getValue();
     bool ArchUsed = false;
     // First, look for flags that are shared in x86 and x86-64.
     if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) {
       if (Arch == "AVX" || Arch == "AVX2") {
         ArchUsed = true;
         Features.push_back(Args.MakeArgString("+" + Arch.lower()));
       }
     }
     // Then, look for x86-specific flags.
     if (ArchType == llvm::Triple::x86) {
       if (Arch == "IA32") {
         ArchUsed = true;
       } else if (Arch == "SSE" || Arch == "SSE2") {
         ArchUsed = true;
         Features.push_back(Args.MakeArgString("+" + Arch.lower()));
       }
     }
     if (!ArchUsed)
       D.Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(Args);
   }
 
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group);
 }
 
 void Clang::AddX86TargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
   if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) ||
       Args.hasArg(options::OPT_mkernel) ||
       Args.hasArg(options::OPT_fapple_kext))
     CmdArgs.push_back("-disable-red-zone");
 
   // Default to avoid implicit floating-point for kernel/kext code, but allow
   // that to be overridden with -mno-soft-float.
   bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) ||
                           Args.hasArg(options::OPT_fapple_kext));
   if (Arg *A = Args.getLastArg(
           options::OPT_msoft_float, options::OPT_mno_soft_float,
           options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) {
     const Option &O = A->getOption();
     NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) ||
                        O.matches(options::OPT_msoft_float));
   }
   if (NoImplicitFloat)
     CmdArgs.push_back("-no-implicit-float");
 
   if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "intel" || Value == "att") {
       CmdArgs.push_back("-mllvm");
       CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
     } else {
       getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
           << A->getOption().getName() << Value;
     }
   }
 
   // Set flags to support MCU ABI.
   if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
     CmdArgs.push_back("-mstack-alignment=4");
   }
 }
 
 void Clang::AddHexagonTargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
   CmdArgs.push_back("-mqdsp6-compat");
   CmdArgs.push_back("-Wreturn-type");
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
     std::string N = llvm::utostr(G.getValue());
     std::string Opt = std::string("-hexagon-small-data-threshold=") + N;
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back(Args.MakeArgString(Opt));
   }
 
   if (!Args.hasArg(options::OPT_fno_short_enums))
     CmdArgs.push_back("-fshort-enums");
   if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back("-enable-hexagon-ieee-rnd-near");
   }
   CmdArgs.push_back("-mllvm");
   CmdArgs.push_back("-machine-sink-split=0");
 }
 
 void Clang::AddLanaiTargetArgs(const ArgList &Args,
                                ArgStringList &CmdArgs) const {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     StringRef CPUName = A->getValue();
 
     CmdArgs.push_back("-target-cpu");
     CmdArgs.push_back(Args.MakeArgString(CPUName));
   }
   if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
     StringRef Value = A->getValue();
     // Only support mregparm=4 to support old usage. Report error for all other
     // cases.
     int Mregparm;
     if (Value.getAsInteger(10, Mregparm)) {
       if (Mregparm != 4) {
         getToolChain().getDriver().Diag(
             diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Value;
       }
     }
   }
 }
 
 void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
                                      ArgStringList &CmdArgs) const {
   // Default to "hidden" visibility.
   if (!Args.hasArg(options::OPT_fvisibility_EQ,
                    options::OPT_fvisibility_ms_compat)) {
     CmdArgs.push_back("-fvisibility");
     CmdArgs.push_back("hidden");
   }
 }
 
 // Decode AArch64 features from string like +[no]featureA+[no]featureB+...
 static bool DecodeAArch64Features(const Driver &D, StringRef text,
                                   std::vector<const char *> &Features) {
   SmallVector<StringRef, 8> Split;
   text.split(Split, StringRef("+"), -1, false);
 
   for (StringRef Feature : Split) {
     const char *result = llvm::StringSwitch<const char *>(Feature)
                              .Case("fp", "+fp-armv8")
                              .Case("simd", "+neon")
                              .Case("crc", "+crc")
                              .Case("crypto", "+crypto")
                              .Case("fp16", "+fullfp16")
                              .Case("profile", "+spe")
                              .Case("ras", "+ras")
                              .Case("nofp", "-fp-armv8")
                              .Case("nosimd", "-neon")
                              .Case("nocrc", "-crc")
                              .Case("nocrypto", "-crypto")
                              .Case("nofp16", "-fullfp16")
                              .Case("noprofile", "-spe")
                              .Case("noras", "-ras")
                              .Default(nullptr);
     if (result)
       Features.push_back(result);
     else if (Feature == "neon" || Feature == "noneon")
       D.Diag(diag::err_drv_no_neon_modifier);
     else
       return false;
   }
   return true;
 }
 
 // Check if the CPU name and feature modifiers in -mcpu are legal. If yes,
 // decode CPU and feature.
 static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
                               std::vector<const char *> &Features) {
   std::pair<StringRef, StringRef> Split = Mcpu.split("+");
   CPU = Split.first;
   if (CPU == "cortex-a53" || CPU == "cortex-a57" ||
       CPU == "cortex-a72" || CPU == "cortex-a35" || CPU == "exynos-m1" ||
       CPU == "kryo"       || CPU == "cortex-a73" || CPU == "vulcan") {
     Features.push_back("+neon");
     Features.push_back("+crc");
     Features.push_back("+crypto");
   } else if (CPU == "cyclone") {
     Features.push_back("+neon");
     Features.push_back("+crypto");
   } else if (CPU == "generic") {
     Features.push_back("+neon");
   } else {
     return false;
   }
 
   if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
     return false;
 
   return true;
 }
 
 static bool
 getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
                                 const ArgList &Args,
                                 std::vector<const char *> &Features) {
   std::string MarchLowerCase = March.lower();
   std::pair<StringRef, StringRef> Split = StringRef(MarchLowerCase).split("+");
 
   if (Split.first == "armv8-a" || Split.first == "armv8a") {
     // ok, no additional features.
   } else if (Split.first == "armv8.1-a" || Split.first == "armv8.1a") {
     Features.push_back("+v8.1a");
   } else if (Split.first == "armv8.2-a" || Split.first == "armv8.2a" ) {
     Features.push_back("+v8.2a");
   } else {
     return false;
   }
 
   if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
     return false;
 
   return true;
 }
 
 static bool
 getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu,
                                const ArgList &Args,
                                std::vector<const char *> &Features) {
   StringRef CPU;
   std::string McpuLowerCase = Mcpu.lower();
   if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features))
     return false;
 
   return true;
 }
 
 static bool
 getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune,
                                      const ArgList &Args,
                                      std::vector<const char *> &Features) {
   std::string MtuneLowerCase = Mtune.lower();
   // Handle CPU name is 'native'.
   if (MtuneLowerCase == "native")
     MtuneLowerCase = llvm::sys::getHostCPUName();
   if (MtuneLowerCase == "cyclone") {
     Features.push_back("+zcm");
     Features.push_back("+zcz");
   }
   return true;
 }
 
 static bool
 getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu,
                                     const ArgList &Args,
                                     std::vector<const char *> &Features) {
   StringRef CPU;
   std::vector<const char *> DecodedFeature;
   std::string McpuLowerCase = Mcpu.lower();
   if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature))
     return false;
 
   return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features);
 }
 
 static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args,
                                      std::vector<const char *> &Features) {
   Arg *A;
   bool success = true;
   // Enable NEON by default.
   Features.push_back("+neon");
   if ((A = Args.getLastArg(options::OPT_march_EQ)))
     success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features);
   else if ((A = Args.getLastArg(options::OPT_mcpu_EQ)))
     success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Features);
   else if (Args.hasArg(options::OPT_arch))
     success = getAArch64ArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args), Args,
                                              Features);
 
   if (success && (A = Args.getLastArg(options::OPT_mtune_EQ)))
     success =
         getAArch64MicroArchFeaturesFromMtune(D, A->getValue(), Args, Features);
   else if (success && (A = Args.getLastArg(options::OPT_mcpu_EQ)))
     success =
         getAArch64MicroArchFeaturesFromMcpu(D, A->getValue(), Args, Features);
   else if (Args.hasArg(options::OPT_arch))
     success = getAArch64MicroArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args),
                                                   Args, Features);
 
   if (!success)
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 
   if (Args.getLastArg(options::OPT_mgeneral_regs_only)) {
     Features.push_back("-fp-armv8");
     Features.push_back("-crypto");
     Features.push_back("-neon");
   }
 
   // En/disable crc
   if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) {
     if (A->getOption().matches(options::OPT_mcrc))
       Features.push_back("+crc");
     else
       Features.push_back("-crc");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                options::OPT_munaligned_access))
     if (A->getOption().matches(options::OPT_mno_unaligned_access))
       Features.push_back("+strict-align");
 
   if (Args.hasArg(options::OPT_ffixed_x18))
     Features.push_back("+reserve-x18");
 }
 
 static void getHexagonTargetFeatures(const ArgList &Args,
                                      std::vector<const char *> &Features) {
   bool HasHVX = false, HasHVXD = false;
 
   // FIXME: This should be able to use handleTargetFeaturesGroup except it is
   // doing dependent option handling here rather than in initFeatureMap or a
   // similar handler.
   for (auto &A : Args) {
     auto &Opt = A->getOption();
     if (Opt.matches(options::OPT_mhexagon_hvx))
       HasHVX = true;
     else if (Opt.matches(options::OPT_mno_hexagon_hvx))
       HasHVXD = HasHVX = false;
     else if (Opt.matches(options::OPT_mhexagon_hvx_double))
       HasHVXD = HasHVX = true;
     else if (Opt.matches(options::OPT_mno_hexagon_hvx_double))
       HasHVXD = false;
     else
       continue;
     A->claim();
   }
 
   Features.push_back(HasHVX  ? "+hvx" : "-hvx");
   Features.push_back(HasHVXD ? "+hvx-double" : "-hvx-double");
 }
 
 static void getWebAssemblyTargetFeatures(const ArgList &Args,
                                          std::vector<const char *> &Features) {
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
 }
 
 static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args,
                                     std::vector<const char *> &Features) {
   if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) {
     StringRef value = dAbi->getValue();
     if (value == "1.0") {
       Features.push_back("+amdgpu-debugger-insert-nops");
       Features.push_back("+amdgpu-debugger-reserve-regs");
       Features.push_back("+amdgpu-debugger-emit-prologue");
     } else {
       D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
     }
   }
 
   handleTargetFeaturesGroup(
     Args, Features, options::OPT_m_amdgpu_Features_Group);
 }
 
 static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
                               const ArgList &Args, ArgStringList &CmdArgs,
                               bool ForAS) {
   const Driver &D = TC.getDriver();
   std::vector<const char *> Features;
   switch (Triple.getArch()) {
   default:
     break;
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     getMIPSTargetFeatures(D, Triple, Args, Features);
     break;
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     getARMTargetFeatures(TC, Triple, Args, Features, ForAS);
     break;
 
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
     getPPCTargetFeatures(D, Triple, Args, Features);
     break;
   case llvm::Triple::systemz:
     getSystemZTargetFeatures(Args, Features);
     break;
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
     getAArch64TargetFeatures(D, Args, Features);
     break;
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     getX86TargetFeatures(D, Triple, Args, Features);
     break;
   case llvm::Triple::hexagon:
     getHexagonTargetFeatures(Args, Features);
     break;
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     getWebAssemblyTargetFeatures(Args, Features);
     break; 
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
     getSparcTargetFeatures(D, Args, Features);
     break;
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
     getAMDGPUTargetFeatures(D, Args, Features);
     break;
   }
 
   // Find the last of each feature.
   llvm::StringMap<unsigned> LastOpt;
   for (unsigned I = 0, N = Features.size(); I < N; ++I) {
     const char *Name = Features[I];
     assert(Name[0] == '-' || Name[0] == '+');
     LastOpt[Name + 1] = I;
   }
 
   for (unsigned I = 0, N = Features.size(); I < N; ++I) {
     // If this feature was overridden, ignore it.
     const char *Name = Features[I];
     llvm::StringMap<unsigned>::iterator LastI = LastOpt.find(Name + 1);
     assert(LastI != LastOpt.end());
     unsigned Last = LastI->second;
     if (Last != I)
       continue;
 
     CmdArgs.push_back("-target-feature");
     CmdArgs.push_back(Name);
   }
 }
 
 static bool
 shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
                                           const llvm::Triple &Triple) {
   // We use the zero-cost exception tables for Objective-C if the non-fragile
   // ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and
   // later.
   if (runtime.isNonFragile())
     return true;
 
   if (!Triple.isMacOSX())
     return false;
 
   return (!Triple.isMacOSXVersionLT(10, 5) &&
           (Triple.getArch() == llvm::Triple::x86_64 ||
            Triple.getArch() == llvm::Triple::arm));
 }
 
 /// Adds exception related arguments to the driver command arguments. There's a
 /// master flag, -fexceptions and also language specific flags to enable/disable
 /// C++ and Objective-C exceptions. This makes it possible to for example
 /// disable C++ exceptions but enable Objective-C exceptions.
 static void addExceptionArgs(const ArgList &Args, types::ID InputType,
                              const ToolChain &TC, bool KernelOrKext,
                              const ObjCRuntime &objcRuntime,
                              ArgStringList &CmdArgs) {
   const Driver &D = TC.getDriver();
   const llvm::Triple &Triple = TC.getTriple();
 
   if (KernelOrKext) {
     // -mkernel and -fapple-kext imply no exceptions, so claim exception related
     // arguments now to avoid warnings about unused arguments.
     Args.ClaimAllArgs(options::OPT_fexceptions);
     Args.ClaimAllArgs(options::OPT_fno_exceptions);
     Args.ClaimAllArgs(options::OPT_fobjc_exceptions);
     Args.ClaimAllArgs(options::OPT_fno_objc_exceptions);
     Args.ClaimAllArgs(options::OPT_fcxx_exceptions);
     Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions);
     return;
   }
 
   // See if the user explicitly enabled exceptions.
   bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
                          false);
 
   // Obj-C exceptions are enabled by default, regardless of -fexceptions. This
   // is not necessarily sensible, but follows GCC.
   if (types::isObjC(InputType) &&
       Args.hasFlag(options::OPT_fobjc_exceptions,
                    options::OPT_fno_objc_exceptions, true)) {
     CmdArgs.push_back("-fobjc-exceptions");
 
     EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
   }
 
   if (types::isCXX(InputType)) {
     // Disable C++ EH by default on XCore and PS4.
     bool CXXExceptionsEnabled =
         Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU();
     Arg *ExceptionArg = Args.getLastArg(
         options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions,
         options::OPT_fexceptions, options::OPT_fno_exceptions);
     if (ExceptionArg)
       CXXExceptionsEnabled =
           ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) ||
           ExceptionArg->getOption().matches(options::OPT_fexceptions);
 
     if (CXXExceptionsEnabled) {
       if (Triple.isPS4CPU()) {
         ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
         assert(ExceptionArg &&
                "On the PS4 exceptions should only be enabled if passing "
                "an argument");
         if (RTTIMode == ToolChain::RM_DisabledExplicitly) {
           const Arg *RTTIArg = TC.getRTTIArg();
           assert(RTTIArg && "RTTI disabled explicitly but no RTTIArg!");
           D.Diag(diag::err_drv_argument_not_allowed_with)
               << RTTIArg->getAsString(Args) << ExceptionArg->getAsString(Args);
         } else if (RTTIMode == ToolChain::RM_EnabledImplicitly)
           D.Diag(diag::warn_drv_enabling_rtti_with_exceptions);
       } else
         assert(TC.getRTTIMode() != ToolChain::RM_DisabledImplicitly);
 
       CmdArgs.push_back("-fcxx-exceptions");
 
       EH = true;
     }
   }
 
   if (EH)
     CmdArgs.push_back("-fexceptions");
 }
 
 static bool ShouldDisableAutolink(const ArgList &Args, const ToolChain &TC) {
   bool Default = true;
   if (TC.getTriple().isOSDarwin()) {
     // The native darwin assembler doesn't support the linker_option directives,
     // so we disable them if we think the .s file will be passed to it.
     Default = TC.useIntegratedAs();
   }
   return !Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink,
                        Default);
 }
 
 static bool ShouldDisableDwarfDirectory(const ArgList &Args,
                                         const ToolChain &TC) {
   bool UseDwarfDirectory =
       Args.hasFlag(options::OPT_fdwarf_directory_asm,
                    options::OPT_fno_dwarf_directory_asm, TC.useIntegratedAs());
   return !UseDwarfDirectory;
 }
 
 /// \brief Check whether the given input tree contains any compilation actions.
 static bool ContainsCompileAction(const Action *A) {
   if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A))
     return true;
 
   for (const auto &AI : A->inputs())
     if (ContainsCompileAction(AI))
       return true;
 
   return false;
 }
 
 /// \brief Check if -relax-all should be passed to the internal assembler.
 /// This is done by default when compiling non-assembler source with -O0.
 static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
   bool RelaxDefault = true;
 
   if (Arg *A = Args.getLastArg(options::OPT_O_Group))
     RelaxDefault = A->getOption().matches(options::OPT_O0);
 
   if (RelaxDefault) {
     RelaxDefault = false;
     for (const auto &Act : C.getActions()) {
       if (ContainsCompileAction(Act)) {
         RelaxDefault = true;
         break;
       }
     }
   }
 
   return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
                       RelaxDefault);
 }
 
 // Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
 // to the corresponding DebugInfoKind.
 static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
   assert(A.getOption().matches(options::OPT_gN_Group) &&
          "Not a -g option that specifies a debug-info level");
   if (A.getOption().matches(options::OPT_g0) ||
       A.getOption().matches(options::OPT_ggdb0))
     return codegenoptions::NoDebugInfo;
   if (A.getOption().matches(options::OPT_gline_tables_only) ||
       A.getOption().matches(options::OPT_ggdb1))
     return codegenoptions::DebugLineTablesOnly;
   return codegenoptions::LimitedDebugInfo;
 }
 
 // Extract the integer N from a string spelled "-dwarf-N", returning 0
 // on mismatch. The StringRef input (rather than an Arg) allows
 // for use by the "-Xassembler" option parser.
 static unsigned DwarfVersionNum(StringRef ArgValue) {
   return llvm::StringSwitch<unsigned>(ArgValue)
       .Case("-gdwarf-2", 2)
       .Case("-gdwarf-3", 3)
       .Case("-gdwarf-4", 4)
       .Case("-gdwarf-5", 5)
       .Default(0);
 }
 
 static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
                                     codegenoptions::DebugInfoKind DebugInfoKind,
                                     unsigned DwarfVersion,
                                     llvm::DebuggerKind DebuggerTuning) {
   switch (DebugInfoKind) {
   case codegenoptions::DebugLineTablesOnly:
     CmdArgs.push_back("-debug-info-kind=line-tables-only");
     break;
   case codegenoptions::LimitedDebugInfo:
     CmdArgs.push_back("-debug-info-kind=limited");
     break;
   case codegenoptions::FullDebugInfo:
     CmdArgs.push_back("-debug-info-kind=standalone");
     break;
   default:
     break;
   }
   if (DwarfVersion > 0)
     CmdArgs.push_back(
         Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion)));
   switch (DebuggerTuning) {
   case llvm::DebuggerKind::GDB:
     CmdArgs.push_back("-debugger-tuning=gdb");
     break;
   case llvm::DebuggerKind::LLDB:
     CmdArgs.push_back("-debugger-tuning=lldb");
     break;
   case llvm::DebuggerKind::SCE:
     CmdArgs.push_back("-debugger-tuning=sce");
     break;
   default:
     break;
   }
 }
 
 static void CollectArgsForIntegratedAssembler(Compilation &C,
                                               const ArgList &Args,
                                               ArgStringList &CmdArgs,
                                               const Driver &D) {
   if (UseRelaxAll(C, Args))
     CmdArgs.push_back("-mrelax-all");
 
   // Only default to -mincremental-linker-compatible if we think we are
   // targeting the MSVC linker.
   bool DefaultIncrementalLinkerCompatible =
       C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
   if (Args.hasFlag(options::OPT_mincremental_linker_compatible,
                    options::OPT_mno_incremental_linker_compatible,
                    DefaultIncrementalLinkerCompatible))
     CmdArgs.push_back("-mincremental-linker-compatible");
 
   // When passing -I arguments to the assembler we sometimes need to
   // unconditionally take the next argument.  For example, when parsing
   // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
   // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
   // arg after parsing the '-I' arg.
   bool TakeNextArg = false;
 
   // When using an integrated assembler, translate -Wa, and -Xassembler
   // options.
   bool CompressDebugSections = false;
 
   bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS;
   const char *MipsTargetFeature = nullptr;
   for (const Arg *A :
        Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
     A->claim();
 
     for (StringRef Value : A->getValues()) {
       if (TakeNextArg) {
         CmdArgs.push_back(Value.data());
         TakeNextArg = false;
         continue;
       }
 
       switch (C.getDefaultToolChain().getArch()) {
       default:
         break;
       case llvm::Triple::mips:
       case llvm::Triple::mipsel:
       case llvm::Triple::mips64:
       case llvm::Triple::mips64el:
         if (Value == "--trap") {
           CmdArgs.push_back("-target-feature");
           CmdArgs.push_back("+use-tcc-in-div");
           continue;
         }
         if (Value == "--break") {
           CmdArgs.push_back("-target-feature");
           CmdArgs.push_back("-use-tcc-in-div");
           continue;
         }
         if (Value.startswith("-msoft-float")) {
           CmdArgs.push_back("-target-feature");
           CmdArgs.push_back("+soft-float");
           continue;
         }
         if (Value.startswith("-mhard-float")) {
           CmdArgs.push_back("-target-feature");
           CmdArgs.push_back("-soft-float");
           continue;
         }
 
         MipsTargetFeature = llvm::StringSwitch<const char *>(Value)
                                 .Case("-mips1", "+mips1")
                                 .Case("-mips2", "+mips2")
                                 .Case("-mips3", "+mips3")
                                 .Case("-mips4", "+mips4")
                                 .Case("-mips5", "+mips5")
                                 .Case("-mips32", "+mips32")
                                 .Case("-mips32r2", "+mips32r2")
                                 .Case("-mips32r3", "+mips32r3")
                                 .Case("-mips32r5", "+mips32r5")
                                 .Case("-mips32r6", "+mips32r6")
                                 .Case("-mips64", "+mips64")
                                 .Case("-mips64r2", "+mips64r2")
                                 .Case("-mips64r3", "+mips64r3")
                                 .Case("-mips64r5", "+mips64r5")
                                 .Case("-mips64r6", "+mips64r6")
                                 .Default(nullptr);
         if (MipsTargetFeature)
           continue;
       }
 
       if (Value == "-force_cpusubtype_ALL") {
         // Do nothing, this is the default and we don't support anything else.
       } else if (Value == "-L") {
         CmdArgs.push_back("-msave-temp-labels");
       } else if (Value == "--fatal-warnings") {
         CmdArgs.push_back("-massembler-fatal-warnings");
       } else if (Value == "--noexecstack") {
         CmdArgs.push_back("-mnoexecstack");
       } else if (Value == "-compress-debug-sections" ||
                  Value == "--compress-debug-sections") {
         CompressDebugSections = true;
       } else if (Value == "-nocompress-debug-sections" ||
                  Value == "--nocompress-debug-sections") {
         CompressDebugSections = false;
       } else if (Value == "-mrelax-relocations=yes" ||
                  Value == "--mrelax-relocations=yes") {
         UseRelaxRelocations = true;
       } else if (Value == "-mrelax-relocations=no" ||
                  Value == "--mrelax-relocations=no") {
         UseRelaxRelocations = false;
       } else if (Value.startswith("-I")) {
         CmdArgs.push_back(Value.data());
         // We need to consume the next argument if the current arg is a plain
         // -I. The next arg will be the include directory.
         if (Value == "-I")
           TakeNextArg = true;
       } else if (Value.startswith("-gdwarf-")) {
         // "-gdwarf-N" options are not cc1as options.
         unsigned DwarfVersion = DwarfVersionNum(Value);
         if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
           CmdArgs.push_back(Value.data());
         } else {
           RenderDebugEnablingArgs(Args, CmdArgs,
                                   codegenoptions::LimitedDebugInfo,
                                   DwarfVersion, llvm::DebuggerKind::Default);
         }
       } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
                  Value.startswith("-mhwdiv") || Value.startswith("-march")) {
         // Do nothing, we'll validate it later.
       } else {
         D.Diag(diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Value;
       }
     }
   }
   if (CompressDebugSections) {
     if (llvm::zlib::isAvailable())
       CmdArgs.push_back("-compress-debug-sections");
     else
       D.Diag(diag::warn_debug_compression_unavailable);
   }
   if (UseRelaxRelocations)
     CmdArgs.push_back("--mrelax-relocations");
   if (MipsTargetFeature != nullptr) {
     CmdArgs.push_back("-target-feature");
     CmdArgs.push_back(MipsTargetFeature);
   }
 }
 
 // This adds the static libclang_rt.builtins-arch.a directly to the command line
 // FIXME: Make sure we can also emit shared objects if they're requested
 // and available, check for possible errors, etc.
 static void addClangRT(const ToolChain &TC, const ArgList &Args,
                        ArgStringList &CmdArgs) {
   CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
 }
 
 namespace {
 enum OpenMPRuntimeKind {
   /// An unknown OpenMP runtime. We can't generate effective OpenMP code
   /// without knowing what runtime to target.
   OMPRT_Unknown,
 
   /// The LLVM OpenMP runtime. When completed and integrated, this will become
   /// the default for Clang.
   OMPRT_OMP,
 
   /// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for
   /// this runtime but can swallow the pragmas, and find and link against the
   /// runtime library itself.
   OMPRT_GOMP,
 
   /// The legacy name for the LLVM OpenMP runtime from when it was the Intel
   /// OpenMP runtime. We support this mode for users with existing dependencies
   /// on this runtime library name.
   OMPRT_IOMP5
 };
 }
 
 /// Compute the desired OpenMP runtime from the flag provided.
 static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC,
                                           const ArgList &Args) {
   StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);
 
   const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ);
   if (A)
     RuntimeName = A->getValue();
 
   auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
                 .Case("libomp", OMPRT_OMP)
                 .Case("libgomp", OMPRT_GOMP)
                 .Case("libiomp5", OMPRT_IOMP5)
                 .Default(OMPRT_Unknown);
 
   if (RT == OMPRT_Unknown) {
     if (A)
       TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
           << A->getOption().getName() << A->getValue();
     else
       // FIXME: We could use a nicer diagnostic here.
       TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
   }
 
   return RT;
 }
 
 static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC,
                               const ArgList &Args) {
   if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
                     options::OPT_fno_openmp, false))
     return;
 
   switch (getOpenMPRuntime(TC, Args)) {
   case OMPRT_OMP:
     CmdArgs.push_back("-lomp");
     break;
   case OMPRT_GOMP:
     CmdArgs.push_back("-lgomp");
     break;
   case OMPRT_IOMP5:
     CmdArgs.push_back("-liomp5");
     break;
   case OMPRT_Unknown:
     // Already diagnosed.
     break;
   }
 }
 
 static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
                                 ArgStringList &CmdArgs, StringRef Sanitizer,
                                 bool IsShared, bool IsWhole) {
   // Wrap any static runtimes that must be forced into executable in
   // whole-archive.
   if (IsWhole) CmdArgs.push_back("-whole-archive");
   CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared));
   if (IsWhole) CmdArgs.push_back("-no-whole-archive");
 }
 
 // Tries to use a file with the list of dynamic symbols that need to be exported
 // from the runtime library. Returns true if the file was found.
 static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     StringRef Sanitizer) {
   SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
   if (llvm::sys::fs::exists(SanRT + ".syms")) {
     CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
     return true;
   }
   return false;
 }
 
 static void linkSanitizerRuntimeDeps(const ToolChain &TC,
                                      ArgStringList &CmdArgs) {
   // Force linking against the system libraries sanitizers depends on
   // (see PR15823 why this is necessary).
   CmdArgs.push_back("--no-as-needed");
   CmdArgs.push_back("-lpthread");
   CmdArgs.push_back("-lrt");
   CmdArgs.push_back("-lm");
   // There's no libdl on FreeBSD.
   if (TC.getTriple().getOS() != llvm::Triple::FreeBSD)
     CmdArgs.push_back("-ldl");
 }
 
 static void
 collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
                          SmallVectorImpl<StringRef> &SharedRuntimes,
                          SmallVectorImpl<StringRef> &StaticRuntimes,
                          SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
                          SmallVectorImpl<StringRef> &HelperStaticRuntimes,
                          SmallVectorImpl<StringRef> &RequiredSymbols) {
   const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
   // Collect shared runtimes.
   if (SanArgs.needsAsanRt() && SanArgs.needsSharedAsanRt()) {
     SharedRuntimes.push_back("asan");
   }
   // The stats_client library is also statically linked into DSOs.
   if (SanArgs.needsStatsRt())
     StaticRuntimes.push_back("stats_client");
 
   // Collect static runtimes.
   if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) {
     // Don't link static runtimes into DSOs or if compiling for Android.
     return;
   }
   if (SanArgs.needsAsanRt()) {
     if (SanArgs.needsSharedAsanRt()) {
       HelperStaticRuntimes.push_back("asan-preinit");
     } else {
       StaticRuntimes.push_back("asan");
       if (SanArgs.linkCXXRuntimes())
         StaticRuntimes.push_back("asan_cxx");
     }
   }
   if (SanArgs.needsDfsanRt())
     StaticRuntimes.push_back("dfsan");
   if (SanArgs.needsLsanRt())
     StaticRuntimes.push_back("lsan");
   if (SanArgs.needsMsanRt()) {
     StaticRuntimes.push_back("msan");
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("msan_cxx");
   }
   if (SanArgs.needsTsanRt()) {
     StaticRuntimes.push_back("tsan");
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("tsan_cxx");
   }
   if (SanArgs.needsUbsanRt()) {
     StaticRuntimes.push_back("ubsan_standalone");
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("ubsan_standalone_cxx");
   }
   if (SanArgs.needsSafeStackRt())
     StaticRuntimes.push_back("safestack");
   if (SanArgs.needsCfiRt())
     StaticRuntimes.push_back("cfi");
   if (SanArgs.needsCfiDiagRt()) {
     StaticRuntimes.push_back("cfi_diag");
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("ubsan_standalone_cxx");
   }
   if (SanArgs.needsStatsRt()) {
     NonWholeStaticRuntimes.push_back("stats");
     RequiredSymbols.push_back("__sanitizer_stats_register");
   }
   if (SanArgs.needsEsanRt())
     StaticRuntimes.push_back("esan");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
 // C runtime, etc). Returns true if sanitizer system deps need to be linked in.
 static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
                                  ArgStringList &CmdArgs) {
   SmallVector<StringRef, 4> SharedRuntimes, StaticRuntimes,
       NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols;
   collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes,
                            NonWholeStaticRuntimes, HelperStaticRuntimes,
                            RequiredSymbols);
   for (auto RT : SharedRuntimes)
     addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false);
   for (auto RT : HelperStaticRuntimes)
     addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
   bool AddExportDynamic = false;
   for (auto RT : StaticRuntimes) {
     addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
     AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
   }
   for (auto RT : NonWholeStaticRuntimes) {
     addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false);
     AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
   }
   for (auto S : RequiredSymbols) {
     CmdArgs.push_back("-u");
     CmdArgs.push_back(Args.MakeArgString(S));
   }
   // If there is a static runtime with no dynamic list, force all the symbols
   // to be dynamic to be sure we export sanitizer interface functions.
   if (AddExportDynamic)
     CmdArgs.push_back("-export-dynamic");
   return !StaticRuntimes.empty();
 }
 
 static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args,
                            ArgStringList &CmdArgs) {
   if (Args.hasFlag(options::OPT_fxray_instrument,
                    options::OPT_fnoxray_instrument, false)) {
     CmdArgs.push_back("-whole-archive");
     CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false));
     CmdArgs.push_back("-no-whole-archive");
     return true;
   }
   return false;
 }
 
 static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args,
                                 ArgStringList &CmdArgs) {
   CmdArgs.push_back("--no-as-needed");
   CmdArgs.push_back("-lpthread");
   CmdArgs.push_back("-lrt");
   CmdArgs.push_back("-lm");
   CmdArgs.push_back("-latomic");
   if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
     CmdArgs.push_back("-lc++");
   else
     CmdArgs.push_back("-lstdc++");
   if (TC.getTriple().getOS() != llvm::Triple::FreeBSD)
     CmdArgs.push_back("-ldl");
 }
 
 static bool areOptimizationsEnabled(const ArgList &Args) {
   // Find the last -O arg and see if it is non-zero.
   if (Arg *A = Args.getLastArg(options::OPT_O_Group))
     return !A->getOption().matches(options::OPT_O0);
   // Defaults to -O0.
   return false;
 }
 
 static bool shouldUseFramePointerForTarget(const ArgList &Args,
                                            const llvm::Triple &Triple) {
   switch (Triple.getArch()) {
   case llvm::Triple::xcore:
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     // XCore never wants frame pointers, regardless of OS.
     // WebAssembly never wants frame pointers.
     return false;
   default:
     break;
   }
 
   if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI) {
     switch (Triple.getArch()) {
     // Don't use a frame pointer on linux if optimizing for certain targets.
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
     case llvm::Triple::mips:
     case llvm::Triple::mipsel:
     case llvm::Triple::systemz:
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
       return !areOptimizationsEnabled(Args);
     default:
       return true;
     }
   }
 
   if (Triple.isOSWindows()) {
     switch (Triple.getArch()) {
     case llvm::Triple::x86:
       return !areOptimizationsEnabled(Args);
     case llvm::Triple::x86_64:
       return Triple.isOSBinFormatMachO();
     case llvm::Triple::arm:
     case llvm::Triple::thumb:
       // Windows on ARM builds with FPO disabled to aid fast stack walking
       return true;
     default:
       // All other supported Windows ISAs use xdata unwind information, so frame
       // pointers are not generally useful.
       return false;
     }
   }
 
   return true;
 }
 
 static bool shouldUseFramePointer(const ArgList &Args,
                                   const llvm::Triple &Triple) {
   if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer,
                                options::OPT_fomit_frame_pointer))
     return A->getOption().matches(options::OPT_fno_omit_frame_pointer);
   if (Args.hasArg(options::OPT_pg))
     return true;
 
   return shouldUseFramePointerForTarget(Args, Triple);
 }
 
 static bool shouldUseLeafFramePointer(const ArgList &Args,
                                       const llvm::Triple &Triple) {
   if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer,
                                options::OPT_momit_leaf_frame_pointer))
     return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer);
   if (Args.hasArg(options::OPT_pg))
     return true;
 
   if (Triple.isPS4CPU())
     return false;
 
   return shouldUseFramePointerForTarget(Args, Triple);
 }
 
 /// Add a CC1 option to specify the debug compilation directory.
 static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) {
   SmallString<128> cwd;
   if (!llvm::sys::fs::current_path(cwd)) {
     CmdArgs.push_back("-fdebug-compilation-dir");
     CmdArgs.push_back(Args.MakeArgString(cwd));
   }
 }
 
 static const char *SplitDebugName(const ArgList &Args, const InputInfo &Input) {
   Arg *FinalOutput = Args.getLastArg(options::OPT_o);
   if (FinalOutput && Args.hasArg(options::OPT_c)) {
     SmallString<128> T(FinalOutput->getValue());
     llvm::sys::path::replace_extension(T, "dwo");
     return Args.MakeArgString(T);
   } else {
     // Use the compilation dir.
     SmallString<128> T(
         Args.getLastArgValue(options::OPT_fdebug_compilation_dir));
     SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput()));
     llvm::sys::path::replace_extension(F, "dwo");
     T += F;
     return Args.MakeArgString(F);
   }
 }
 
 static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
                            const JobAction &JA, const ArgList &Args,
                            const InputInfo &Output, const char *OutFile) {
   ArgStringList ExtractArgs;
   ExtractArgs.push_back("--extract-dwo");
 
   ArgStringList StripArgs;
   StripArgs.push_back("--strip-dwo");
 
   // Grabbing the output of the earlier compile step.
   StripArgs.push_back(Output.getFilename());
   ExtractArgs.push_back(Output.getFilename());
   ExtractArgs.push_back(OutFile);
 
   const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy"));
   InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());
 
   // First extract the dwo sections.
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs, II));
 
   // Then remove them from the original .o file.
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, StripArgs, II));
 }
 
 /// \brief Vectorize at all optimization levels greater than 1 except for -Oz.
 /// For -Oz the loop vectorizer is disable, while the slp vectorizer is enabled.
 static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
     if (A->getOption().matches(options::OPT_O4) ||
         A->getOption().matches(options::OPT_Ofast))
       return true;
 
     if (A->getOption().matches(options::OPT_O0))
       return false;
 
     assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
 
     // Vectorize -Os.
     StringRef S(A->getValue());
     if (S == "s")
       return true;
 
     // Don't vectorize -Oz, unless it's the slp vectorizer.
     if (S == "z")
       return isSlpVec;
 
     unsigned OptLevel = 0;
     if (S.getAsInteger(10, OptLevel))
       return false;
 
     return OptLevel > 1;
   }
 
   return false;
 }
 
 /// Add -x lang to \p CmdArgs for \p Input.
 static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
                              ArgStringList &CmdArgs) {
   // When using -verify-pch, we don't want to provide the type
   // 'precompiled-header' if it was inferred from the file extension
   if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH)
     return;
 
   CmdArgs.push_back("-x");
   if (Args.hasArg(options::OPT_rewrite_objc))
     CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX));
   else
     CmdArgs.push_back(types::getTypeName(Input.getType()));
 }
 
 static VersionTuple getMSCompatibilityVersion(unsigned Version) {
   if (Version < 100)
     return VersionTuple(Version);
 
   if (Version < 10000)
     return VersionTuple(Version / 100, Version % 100);
 
   unsigned Build = 0, Factor = 1;
   for (; Version > 10000; Version = Version / 10, Factor = Factor * 10)
     Build = Build + (Version % 10) * Factor;
   return VersionTuple(Version / 100, Version % 100, Build);
 }
 
 // Claim options we don't want to warn if they are unused. We do this for
 // options that build systems might add but are unused when assembling or only
 // running the preprocessor for example.
 static void claimNoWarnArgs(const ArgList &Args) {
   // Don't warn about unused -f(no-)?lto.  This can happen when we're
   // preprocessing, precompiling or assembling.
   Args.ClaimAllArgs(options::OPT_flto_EQ);
   Args.ClaimAllArgs(options::OPT_flto);
   Args.ClaimAllArgs(options::OPT_fno_lto);
 }
 
 static void appendUserToPath(SmallVectorImpl<char> &Result) {
 #ifdef LLVM_ON_UNIX
   const char *Username = getenv("LOGNAME");
 #else
   const char *Username = getenv("USERNAME");
 #endif
   if (Username) {
     // Validate that LoginName can be used in a path, and get its length.
     size_t Len = 0;
     for (const char *P = Username; *P; ++P, ++Len) {
       if (!isAlphanumeric(*P) && *P != '_') {
         Username = nullptr;
         break;
       }
     }
 
     if (Username && Len > 0) {
       Result.append(Username, Username + Len);
       return;
     }
   }
 
 // Fallback to user id.
 #ifdef LLVM_ON_UNIX
   std::string UID = llvm::utostr(getuid());
 #else
   // FIXME: Windows seems to have an 'SID' that might work.
   std::string UID = "9999";
 #endif
   Result.append(UID.begin(), UID.end());
 }
 
 VersionTuple visualstudio::getMSVCVersion(const Driver *D, const ToolChain &TC,
                                           const llvm::Triple &Triple,
                                           const llvm::opt::ArgList &Args,
                                           bool IsWindowsMSVC) {
   if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
                    IsWindowsMSVC) ||
       Args.hasArg(options::OPT_fmsc_version) ||
       Args.hasArg(options::OPT_fms_compatibility_version)) {
     const Arg *MSCVersion = Args.getLastArg(options::OPT_fmsc_version);
     const Arg *MSCompatibilityVersion =
         Args.getLastArg(options::OPT_fms_compatibility_version);
 
     if (MSCVersion && MSCompatibilityVersion) {
       if (D)
         D->Diag(diag::err_drv_argument_not_allowed_with)
             << MSCVersion->getAsString(Args)
             << MSCompatibilityVersion->getAsString(Args);
       return VersionTuple();
     }
 
     if (MSCompatibilityVersion) {
       VersionTuple MSVT;
       if (MSVT.tryParse(MSCompatibilityVersion->getValue()) && D)
         D->Diag(diag::err_drv_invalid_value)
             << MSCompatibilityVersion->getAsString(Args)
             << MSCompatibilityVersion->getValue();
       return MSVT;
     }
 
     if (MSCVersion) {
       unsigned Version = 0;
       if (StringRef(MSCVersion->getValue()).getAsInteger(10, Version) && D)
         D->Diag(diag::err_drv_invalid_value) << MSCVersion->getAsString(Args)
                                              << MSCVersion->getValue();
       return getMSCompatibilityVersion(Version);
     }
 
     unsigned Major, Minor, Micro;
     Triple.getEnvironmentVersion(Major, Minor, Micro);
     if (Major || Minor || Micro)
       return VersionTuple(Major, Minor, Micro);
 
     if (IsWindowsMSVC) {
       VersionTuple MSVT = TC.getMSVCVersionFromExe();
       if (!MSVT.empty())
         return MSVT;
 
       // FIXME: Consider bumping this to 19 (MSVC2015) soon.
       return VersionTuple(18);
     }
   }
   return VersionTuple();
 }
 
 static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
                                    const InputInfo &Output, const ArgList &Args,
                                    ArgStringList &CmdArgs) {
 
   auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
                                          options::OPT_fprofile_generate_EQ,
                                          options::OPT_fno_profile_generate);
   if (PGOGenerateArg &&
       PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
     PGOGenerateArg = nullptr;
 
   auto *ProfileGenerateArg = Args.getLastArg(
       options::OPT_fprofile_instr_generate,
       options::OPT_fprofile_instr_generate_EQ,
       options::OPT_fno_profile_instr_generate);
   if (ProfileGenerateArg &&
       ProfileGenerateArg->getOption().matches(
           options::OPT_fno_profile_instr_generate))
     ProfileGenerateArg = nullptr;
 
   if (PGOGenerateArg && ProfileGenerateArg)
     D.Diag(diag::err_drv_argument_not_allowed_with)
         << PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling();
 
   auto *ProfileUseArg = Args.getLastArg(
       options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
       options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
       options::OPT_fno_profile_instr_use);
   if (ProfileUseArg &&
       ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
     ProfileUseArg = nullptr;
 
   if (PGOGenerateArg && ProfileUseArg)
     D.Diag(diag::err_drv_argument_not_allowed_with)
         << ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling();
 
   if (ProfileGenerateArg && ProfileUseArg)
     D.Diag(diag::err_drv_argument_not_allowed_with)
         << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();
 
   if (ProfileGenerateArg) {
     if (ProfileGenerateArg->getOption().matches(
             options::OPT_fprofile_instr_generate_EQ))
       CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") +
                                            ProfileGenerateArg->getValue()));
     // The default is to use Clang Instrumentation.
     CmdArgs.push_back("-fprofile-instrument=clang");
   }
 
   if (PGOGenerateArg) {
     CmdArgs.push_back("-fprofile-instrument=llvm");
     if (PGOGenerateArg->getOption().matches(
             options::OPT_fprofile_generate_EQ)) {
       SmallString<128> Path(PGOGenerateArg->getValue());
       llvm::sys::path::append(Path, "default.profraw");
       CmdArgs.push_back(
           Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path));
     }
   }
 
   if (ProfileUseArg) {
     if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
       CmdArgs.push_back(Args.MakeArgString(
           Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
     else if ((ProfileUseArg->getOption().matches(
                   options::OPT_fprofile_use_EQ) ||
               ProfileUseArg->getOption().matches(
                   options::OPT_fprofile_instr_use))) {
       SmallString<128> Path(
           ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
       if (Path.empty() || llvm::sys::fs::is_directory(Path))
         llvm::sys::path::append(Path, "default.profdata");
       CmdArgs.push_back(
           Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
     }
   }
 
   if (Args.hasArg(options::OPT_ftest_coverage) ||
       Args.hasArg(options::OPT_coverage))
     CmdArgs.push_back("-femit-coverage-notes");
   if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
                    false) ||
       Args.hasArg(options::OPT_coverage))
     CmdArgs.push_back("-femit-coverage-data");
 
   if (Args.hasFlag(options::OPT_fcoverage_mapping,
                    options::OPT_fno_coverage_mapping, false) &&
       !ProfileGenerateArg)
     D.Diag(diag::err_drv_argument_only_allowed_with)
         << "-fcoverage-mapping"
         << "-fprofile-instr-generate";
 
   if (Args.hasFlag(options::OPT_fcoverage_mapping,
                    options::OPT_fno_coverage_mapping, false))
     CmdArgs.push_back("-fcoverage-mapping");
 
   if (C.getArgs().hasArg(options::OPT_c) ||
       C.getArgs().hasArg(options::OPT_S)) {
     if (Output.isFilename()) {
       CmdArgs.push_back("-coverage-file");
       SmallString<128> CoverageFilename;
       if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) {
         CoverageFilename = FinalOutput->getValue();
       } else {
         CoverageFilename = llvm::sys::path::filename(Output.getBaseInput());
       }
       if (llvm::sys::path::is_relative(CoverageFilename)) {
         SmallString<128> Pwd;
         if (!llvm::sys::fs::current_path(Pwd)) {
           llvm::sys::path::append(Pwd, CoverageFilename);
           CoverageFilename.swap(Pwd);
         }
       }
       CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
     }
   }
 }
 
 static void addPS4ProfileRTArgs(const ToolChain &TC, const ArgList &Args,
                                 ArgStringList &CmdArgs) {
   if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
                     false) ||
        Args.hasFlag(options::OPT_fprofile_generate,
                     options::OPT_fno_profile_instr_generate, false) ||
        Args.hasFlag(options::OPT_fprofile_generate_EQ,
                     options::OPT_fno_profile_instr_generate, false) ||
        Args.hasFlag(options::OPT_fprofile_instr_generate,
                     options::OPT_fno_profile_instr_generate, false) ||
        Args.hasFlag(options::OPT_fprofile_instr_generate_EQ,
                     options::OPT_fno_profile_instr_generate, false) ||
        Args.hasArg(options::OPT_fcreate_profile) ||
        Args.hasArg(options::OPT_coverage)))
     CmdArgs.push_back("--dependent-lib=libclang_rt.profile-x86_64.a");
 }
 
 /// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments.  Then,
 /// smooshes them together with platform defaults, to decide whether
 /// this compile should be using PIC mode or not. Returns a tuple of
 /// (RelocationModel, PICLevel, IsPIE).
 static std::tuple<llvm::Reloc::Model, unsigned, bool>
 ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple,
              const ArgList &Args) {
   // FIXME: why does this code...and so much everywhere else, use both
   // ToolChain.getTriple() and Triple?
   bool PIE = ToolChain.isPIEDefault();
   bool PIC = PIE || ToolChain.isPICDefault();
   // The Darwin/MachO default to use PIC does not apply when using -static.
   if (ToolChain.getTriple().isOSBinFormatMachO() &&
       Args.hasArg(options::OPT_static))
     PIE = PIC = false;
   bool IsPICLevelTwo = PIC;
 
   bool KernelOrKext =
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
 
   // Android-specific defaults for PIC/PIE
   if (ToolChain.getTriple().isAndroid()) {
     switch (ToolChain.getArch()) {
     case llvm::Triple::arm:
     case llvm::Triple::armeb:
     case llvm::Triple::thumb:
     case llvm::Triple::thumbeb:
     case llvm::Triple::aarch64:
     case llvm::Triple::mips:
     case llvm::Triple::mipsel:
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
       PIC = true; // "-fpic"
       break;
 
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
       PIC = true; // "-fPIC"
       IsPICLevelTwo = true;
       break;
 
     default:
       break;
     }
   }
 
   // OpenBSD-specific defaults for PIE
   if (ToolChain.getTriple().getOS() == llvm::Triple::OpenBSD) {
     switch (ToolChain.getArch()) {
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
     case llvm::Triple::sparcel:
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
       IsPICLevelTwo = false; // "-fpie"
       break;
 
     case llvm::Triple::ppc:
     case llvm::Triple::sparc:
     case llvm::Triple::sparcv9:
       IsPICLevelTwo = true; // "-fPIE"
       break;
 
     default:
       break;
     }
   }
 
   // The last argument relating to either PIC or PIE wins, and no
   // other argument is used. If the last argument is any flavor of the
   // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
   // option implicitly enables PIC at the same level.
   Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
                                     options::OPT_fpic, options::OPT_fno_pic,
                                     options::OPT_fPIE, options::OPT_fno_PIE,
                                     options::OPT_fpie, options::OPT_fno_pie);
   // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
   // is forced, then neither PIC nor PIE flags will have no effect.
   if (!ToolChain.isPICDefaultForced()) {
     if (LastPICArg) {
       Option O = LastPICArg->getOption();
       if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
           O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
         PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
         PIC =
             PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
         IsPICLevelTwo =
             O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
       } else {
         PIE = PIC = false;
         if (Triple.isPS4CPU()) {
           Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ);
           StringRef Model = ModelArg ? ModelArg->getValue() : "";
           if (Model != "kernel") {
             PIC = true;
             ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic)
                 << LastPICArg->getSpelling();
           }
         }
       }
     }
   }
 
   // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the
   // PIC level would've been set to level 1, force it back to level 2 PIC
   // instead.
   if (PIC && (ToolChain.getTriple().isOSDarwin() || Triple.isPS4CPU()))
     IsPICLevelTwo |= ToolChain.isPICDefault();
 
   // This kernel flags are a trump-card: they will disable PIC/PIE
   // generation, independent of the argument order.
   if (KernelOrKext && ((!Triple.isiOS() || Triple.isOSVersionLT(6)) &&
                        !Triple.isWatchOS()))
     PIC = PIE = false;
 
   if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
     // This is a very special mode. It trumps the other modes, almost no one
     // uses it, and it isn't even valid on any OS but Darwin.
     if (!ToolChain.getTriple().isOSDarwin())
       ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getSpelling() << ToolChain.getTriple().str();
 
     // FIXME: Warn when this flag trumps some other PIC or PIE flag.
 
     // Only a forced PIC mode can cause the actual compile to have PIC defines
     // etc., no flags are sufficient. This behavior was selected to closely
     // match that of llvm-gcc and Apple GCC before that.
     PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced();
 
     return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2 : 0, false);
   }
 
   if (PIC)
     return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2 : 1, PIE);
 
   return std::make_tuple(llvm::Reloc::Static, 0, false);
 }
 
 static const char *RelocationModelName(llvm::Reloc::Model Model) {
   switch (Model) {
   case llvm::Reloc::Static:
     return "static";
   case llvm::Reloc::PIC_:
     return "pic";
   case llvm::Reloc::DynamicNoPIC:
     return "dynamic-no-pic";
   }
   llvm_unreachable("Unknown Reloc::Model kind");
 }
 
 static void AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args,
                              ArgStringList &CmdArgs) {
   llvm::Reloc::Model RelocationModel;
   unsigned PICLevel;
   bool IsPIE;
   std::tie(RelocationModel, PICLevel, IsPIE) =
       ParsePICArgs(ToolChain, ToolChain.getTriple(), Args);
 
   if (RelocationModel != llvm::Reloc::Static)
     CmdArgs.push_back("-KPIC");
 }
 
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                          const InputInfo &Output, const InputInfoList &Inputs,
                          const ArgList &Args, const char *LinkingOutput) const {
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   const llvm::Triple Triple(TripleStr);
 
   bool KernelOrKext =
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment();
   bool IsWindowsCygnus =
       getToolChain().getTriple().isWindowsCygwinEnvironment();
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
   bool IsPS4CPU = getToolChain().getTriple().isPS4CPU();
   bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
 
   // Check number of inputs for sanity. We need at least one input.
   assert(Inputs.size() >= 1 && "Must have at least one input.");
   const InputInfo &Input = Inputs[0];
   // CUDA compilation may have multiple inputs (source file + results of
   // device-side compilations). All other jobs are expected to have exactly one
   // input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
 
   // C++ is not supported for IAMCU.
   if (IsIAMCU && types::isCXX(Input.getType()))
     D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";
 
   // Invoke ourselves in -cc1 mode.
   //
   // FIXME: Implement custom jobs for internal actions.
   CmdArgs.push_back("-cc1");
 
   // Add the "effective" target triple.
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
   if (IsCuda) {
     // We have to pass the triple of the host if compiling for a CUDA device and
     // vice-versa.
     std::string NormalizedTriple;
     if (JA.isDeviceOffloading(Action::OFK_Cuda))
       NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Host>()
                              ->getTriple()
                              .normalize();
     else
       NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Cuda>()
                              ->getTriple()
                              .normalize();
 
     CmdArgs.push_back("-aux-triple");
     CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
   }
 
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                Triple.getArch() == llvm::Triple::thumb)) {
     unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
     unsigned Version;
     Triple.getArchName().substr(Offset).getAsInteger(10, Version);
     if (Version < 7)
       D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
                                                 << TripleStr;
   }
 
   // Push all default warning arguments that are specific to
   // the given target.  These come before user provided warning options
   // are provided.
   getToolChain().addClangWarningOptions(CmdArgs);
 
   // Select the appropriate action.
   RewriteKind rewriteKind = RK_None;
 
   if (isa<AnalyzeJobAction>(JA)) {
     assert(JA.getType() == types::TY_Plist && "Invalid output type.");
     CmdArgs.push_back("-analyze");
   } else if (isa<MigrateJobAction>(JA)) {
     CmdArgs.push_back("-migrate");
   } else if (isa<PreprocessJobAction>(JA)) {
     if (Output.getType() == types::TY_Dependencies)
       CmdArgs.push_back("-Eonly");
     else {
       CmdArgs.push_back("-E");
       if (Args.hasArg(options::OPT_rewrite_objc) &&
           !Args.hasArg(options::OPT_g_Group))
         CmdArgs.push_back("-P");
     }
   } else if (isa<AssembleJobAction>(JA)) {
     CmdArgs.push_back("-emit-obj");
 
     CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);
 
     // Also ignore explicit -force_cpusubtype_ALL option.
     (void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
   } else if (isa<PrecompileJobAction>(JA)) {
     // Use PCH if the user requested it.
     bool UsePCH = D.CCCUsePCH;
 
     if (JA.getType() == types::TY_Nothing)
       CmdArgs.push_back("-fsyntax-only");
     else if (UsePCH)
       CmdArgs.push_back("-emit-pch");
     else
       CmdArgs.push_back("-emit-pth");
   } else if (isa<VerifyPCHJobAction>(JA)) {
     CmdArgs.push_back("-verify-pch");
   } else {
     assert((isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) &&
            "Invalid action for clang tool.");
     if (JA.getType() == types::TY_Nothing) {
       CmdArgs.push_back("-fsyntax-only");
     } else if (JA.getType() == types::TY_LLVM_IR ||
                JA.getType() == types::TY_LTO_IR) {
       CmdArgs.push_back("-emit-llvm");
     } else if (JA.getType() == types::TY_LLVM_BC ||
                JA.getType() == types::TY_LTO_BC) {
       CmdArgs.push_back("-emit-llvm-bc");
     } else if (JA.getType() == types::TY_PP_Asm) {
       CmdArgs.push_back("-S");
     } else if (JA.getType() == types::TY_AST) {
       CmdArgs.push_back("-emit-pch");
     } else if (JA.getType() == types::TY_ModuleFile) {
       CmdArgs.push_back("-module-file-info");
     } else if (JA.getType() == types::TY_RewrittenObjC) {
       CmdArgs.push_back("-rewrite-objc");
       rewriteKind = RK_NonFragile;
     } else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
       CmdArgs.push_back("-rewrite-objc");
       rewriteKind = RK_Fragile;
     } else {
       assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
     }
 
     // Preserve use-list order by default when emitting bitcode, so that
     // loading the bitcode up in 'opt' or 'llc' and running passes gives the
     // same result as running passes here.  For LTO, we don't need to preserve
     // the use-list order, since serialization to bitcode is part of the flow.
     if (JA.getType() == types::TY_LLVM_BC)
       CmdArgs.push_back("-emit-llvm-uselists");
 
     if (D.isUsingLTO())
       Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
   }
 
   if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
     if (!types::isLLVMIR(Input.getType()))
       D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
                                                        << "-x ir";
     Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
   }
 
   // Embed-bitcode option.
   if (C.getDriver().embedBitcodeEnabled() &&
       (isa<BackendJobAction>(JA) || isa<AssembleJobAction>(JA))) {
     // Add flags implied by -fembed-bitcode.
     Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
     // Disable all llvm IR level optimizations.
     CmdArgs.push_back("-disable-llvm-optzns");
   }
   if (C.getDriver().embedBitcodeMarkerOnly())
     CmdArgs.push_back("-fembed-bitcode=marker");
 
   // We normally speed up the clang process a bit by skipping destructors at
   // exit, but when we're generating diagnostics we can rely on some of the
   // cleanup.
   if (!C.isForDiagnostics())
     CmdArgs.push_back("-disable-free");
 
 // Disable the verification pass in -asserts builds.
 #ifdef NDEBUG
   CmdArgs.push_back("-disable-llvm-verifier");
   // Discard LLVM value names in -asserts builds.
   CmdArgs.push_back("-discard-value-names");
 #endif
 
   // Set the main file name, so that debug info works even with
   // -save-temps.
   CmdArgs.push_back("-main-file-name");
   CmdArgs.push_back(getBaseInputName(Args, Input));
 
   // Some flags which affect the language (via preprocessor
   // defines).
   if (Args.hasArg(options::OPT_static))
     CmdArgs.push_back("-static-define");
 
   if (isa<AnalyzeJobAction>(JA)) {
     // Enable region store model by default.
     CmdArgs.push_back("-analyzer-store=region");
 
     // Treat blocks as analysis entry points.
     CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks");
 
     CmdArgs.push_back("-analyzer-eagerly-assume");
 
     // Add default argument set.
     if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
       CmdArgs.push_back("-analyzer-checker=core");
 
     if (!IsWindowsMSVC) {
       CmdArgs.push_back("-analyzer-checker=unix");
     } else {
       // Enable "unix" checkers that also work on Windows.
       CmdArgs.push_back("-analyzer-checker=unix.API");
       CmdArgs.push_back("-analyzer-checker=unix.Malloc");
       CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof");
       CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator");
       CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg");
       CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg");
     }
 
       // Disable some unix checkers for PS4.
       if (IsPS4CPU) {
         CmdArgs.push_back("-analyzer-disable-checker=unix.API");
         CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork");
       }
 
       if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple)
         CmdArgs.push_back("-analyzer-checker=osx");
 
       CmdArgs.push_back("-analyzer-checker=deadcode");
 
       if (types::isCXX(Input.getType()))
         CmdArgs.push_back("-analyzer-checker=cplusplus");
 
       if (!IsPS4CPU) {
         CmdArgs.push_back(
             "-analyzer-checker=security.insecureAPI.UncheckedReturn");
         CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
         CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
         CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
         CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
         CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
       }
 
       // Default nullability checks.
       CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull");
       CmdArgs.push_back(
           "-analyzer-checker=nullability.NullReturnedFromNonnull");
     }
 
     // Set the output format. The default is plist, for (lame) historical
     // reasons.
     CmdArgs.push_back("-analyzer-output");
     if (Arg *A = Args.getLastArg(options::OPT__analyzer_output))
       CmdArgs.push_back(A->getValue());
     else
       CmdArgs.push_back("plist");
 
     // Disable the presentation of standard compiler warnings when
     // using --analyze.  We only want to show static analyzer diagnostics
     // or frontend errors.
     CmdArgs.push_back("-w");
 
     // Add -Xanalyzer arguments when running as analyzer.
     Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer);
   }
 
   CheckCodeGenerationOptions(D, Args);
 
   llvm::Reloc::Model RelocationModel;
   unsigned PICLevel;
   bool IsPIE;
   std::tie(RelocationModel, PICLevel, IsPIE) =
       ParsePICArgs(getToolChain(), Triple, Args);
 
   const char *RMName = RelocationModelName(RelocationModel);
   if (RMName) {
     CmdArgs.push_back("-mrelocation-model");
     CmdArgs.push_back(RMName);
   }
   if (PICLevel > 0) {
     CmdArgs.push_back("-pic-level");
     CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
     if (IsPIE)
       CmdArgs.push_back("-pic-is-pie");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
     CmdArgs.push_back("-meabi");
     CmdArgs.push_back(A->getValue());
   }
 
   CmdArgs.push_back("-mthread-model");
   if (Arg *A = Args.getLastArg(options::OPT_mthread_model))
     CmdArgs.push_back(A->getValue());
   else
     CmdArgs.push_back(Args.MakeArgString(getToolChain().getThreadModel()));
 
   Args.AddLastArg(CmdArgs, options::OPT_fveclib);
 
   if (!Args.hasFlag(options::OPT_fmerge_all_constants,
                     options::OPT_fno_merge_all_constants))
     CmdArgs.push_back("-fno-merge-all-constants");
 
   // LLVM Code Generator Options.
 
   if (Args.hasArg(options::OPT_frewrite_map_file) ||
       Args.hasArg(options::OPT_frewrite_map_file_EQ)) {
     for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file,
                                       options::OPT_frewrite_map_file_EQ)) {
       CmdArgs.push_back("-frewrite-map-file");
       CmdArgs.push_back(A->getValue());
       A->claim();
     }
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {
     StringRef v = A->getValue();
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v));
     A->claim();
   }
 
   if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables,
                     true))
     CmdArgs.push_back("-fno-jump-tables");
 
   if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
     CmdArgs.push_back("-mregparm");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return,
                                options::OPT_freg_struct_return)) {
     if (getToolChain().getArch() != llvm::Triple::x86) {
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getSpelling() << getToolChain().getTriple().str();
     } else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
       CmdArgs.push_back("-fpcc-struct-return");
     } else {
       assert(A->getOption().matches(options::OPT_freg_struct_return));
       CmdArgs.push_back("-freg-struct-return");
     }
   }
 
   if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
     CmdArgs.push_back("-fdefault-calling-conv=stdcall");
 
   if (shouldUseFramePointer(Args, getToolChain().getTriple()))
     CmdArgs.push_back("-mdisable-fp-elim");
   if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss,
                     options::OPT_fno_zero_initialized_in_bss))
     CmdArgs.push_back("-mno-zero-initialized-in-bss");
 
   bool OFastEnabled = isOptimizationLevelFast(Args);
   // If -Ofast is the optimization level, then -fstrict-aliasing should be
   // enabled.  This alias option is being used to simplify the hasFlag logic.
   OptSpecifier StrictAliasingAliasOption =
       OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing;
   // We turn strict aliasing off by default if we're in CL mode, since MSVC
   // doesn't do any TBAA.
   bool TBAAOnByDefault = !getToolChain().getDriver().IsCLMode();
   if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
                     options::OPT_fno_strict_aliasing, TBAAOnByDefault))
     CmdArgs.push_back("-relaxed-aliasing");
   if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
                     options::OPT_fno_struct_path_tbaa))
     CmdArgs.push_back("-no-struct-path-tbaa");
   if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
                    false))
     CmdArgs.push_back("-fstrict-enums");
   if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
                    options::OPT_fno_strict_vtable_pointers,
                    false))
     CmdArgs.push_back("-fstrict-vtable-pointers");
   if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
                     options::OPT_fno_optimize_sibling_calls))
     CmdArgs.push_back("-mdisable-tail-calls");
 
   // Handle segmented stacks.
   if (Args.hasArg(options::OPT_fsplit_stack))
     CmdArgs.push_back("-split-stacks");
 
   // If -Ofast is the optimization level, then -ffast-math should be enabled.
   // This alias option is being used to simplify the getLastArg logic.
   OptSpecifier FastMathAliasOption =
       OFastEnabled ? options::OPT_Ofast : options::OPT_ffast_math;
 
   // Handle various floating point optimization flags, mapping them to the
   // appropriate LLVM code generation flags. The pattern for all of these is to
   // default off the codegen optimizations, and if any flag enables them and no
   // flag disables them after the flag enabling them, enable the codegen
   // optimization. This is complicated by several "umbrella" flags.
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_ffinite_math_only,
           options::OPT_fno_finite_math_only, options::OPT_fhonor_infinities,
           options::OPT_fno_honor_infinities))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_finite_math_only &&
         A->getOption().getID() != options::OPT_fhonor_infinities)
       CmdArgs.push_back("-menable-no-infs");
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_ffinite_math_only,
           options::OPT_fno_finite_math_only, options::OPT_fhonor_nans,
           options::OPT_fno_honor_nans))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_finite_math_only &&
         A->getOption().getID() != options::OPT_fhonor_nans)
       CmdArgs.push_back("-menable-no-nans");
 
   // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
   bool MathErrno = getToolChain().IsMathErrnoDefault();
   if (Arg *A =
           Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
                           options::OPT_fno_fast_math, options::OPT_fmath_errno,
                           options::OPT_fno_math_errno)) {
     // Turning on -ffast_math (with either flag) removes the need for MathErrno.
     // However, turning *off* -ffast_math merely restores the toolchain default
     // (which may be false).
     if (A->getOption().getID() == options::OPT_fno_math_errno ||
         A->getOption().getID() == options::OPT_ffast_math ||
         A->getOption().getID() == options::OPT_Ofast)
       MathErrno = false;
     else if (A->getOption().getID() == options::OPT_fmath_errno)
       MathErrno = true;
   }
   if (MathErrno)
     CmdArgs.push_back("-fmath-errno");
 
   // There are several flags which require disabling very specific
   // optimizations. Any of these being disabled forces us to turn off the
   // entire set of LLVM optimizations, so collect them through all the flag
   // madness.
   bool AssociativeMath = false;
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
           options::OPT_fno_unsafe_math_optimizations,
           options::OPT_fassociative_math, options::OPT_fno_associative_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fno_associative_math)
       AssociativeMath = true;
   bool ReciprocalMath = false;
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
           options::OPT_fno_unsafe_math_optimizations,
           options::OPT_freciprocal_math, options::OPT_fno_reciprocal_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fno_reciprocal_math)
       ReciprocalMath = true;
   bool SignedZeros = true;
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
           options::OPT_fno_unsafe_math_optimizations,
           options::OPT_fsigned_zeros, options::OPT_fno_signed_zeros))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fsigned_zeros)
       SignedZeros = false;
   bool TrappingMath = true;
   if (Arg *A = Args.getLastArg(
           options::OPT_ffast_math, FastMathAliasOption,
           options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
           options::OPT_fno_unsafe_math_optimizations,
           options::OPT_ftrapping_math, options::OPT_fno_trapping_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_ftrapping_math)
       TrappingMath = false;
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
       !TrappingMath)
     CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
     CmdArgs.push_back("-fno-signed-zeros");
 
   if (ReciprocalMath)
     CmdArgs.push_back("-freciprocal-math");
 
   // Validate and pass through -fp-contract option.
   if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
                                options::OPT_fno_fast_math,
                                options::OPT_ffp_contract)) {
     if (A->getOption().getID() == options::OPT_ffp_contract) {
       StringRef Val = A->getValue();
       if (Val == "fast" || Val == "on" || Val == "off") {
         CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val));
       } else {
         D.Diag(diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Val;
       }
     } else if (A->getOption().matches(options::OPT_ffast_math) ||
                (OFastEnabled && A->getOption().matches(options::OPT_Ofast))) {
       // If fast-math is set then set the fp-contract mode to fast.
       CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
     }
   }
 
   ParseMRecip(getToolChain().getDriver(), Args, CmdArgs);
 
   // We separately look for the '-ffast-math' and '-ffinite-math-only' flags,
   // and if we find them, tell the frontend to provide the appropriate
   // preprocessor macros. This is distinct from enabling any optimizations as
   // these options induce language changes which must survive serialization
   // and deserialization, etc.
   if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
                                options::OPT_fno_fast_math))
     if (!A->getOption().matches(options::OPT_fno_fast_math))
       CmdArgs.push_back("-ffast-math");
   if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only,
                                options::OPT_fno_fast_math))
     if (A->getOption().matches(options::OPT_ffinite_math_only))
       CmdArgs.push_back("-ffinite-math-only");
 
   // Decide whether to use verbose asm. Verbose assembly is the default on
   // toolchains which have the integrated assembler on by default.
   bool IsIntegratedAssemblerDefault =
       getToolChain().IsIntegratedAssemblerDefault();
   if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
                    IsIntegratedAssemblerDefault) ||
       Args.hasArg(options::OPT_dA))
     CmdArgs.push_back("-masm-verbose");
 
   if (!Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as,
                     IsIntegratedAssemblerDefault))
     CmdArgs.push_back("-no-integrated-as");
 
   if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
     CmdArgs.push_back("-mdebug-pass");
     CmdArgs.push_back("Structure");
   }
   if (Args.hasArg(options::OPT_fdebug_pass_arguments)) {
     CmdArgs.push_back("-mdebug-pass");
     CmdArgs.push_back("Arguments");
   }
 
   // Enable -mconstructor-aliases except on darwin, where we have to work around
   // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
   // aliases aren't supported.
   if (!getToolChain().getTriple().isOSDarwin() &&
       !getToolChain().getTriple().isNVPTX())
     CmdArgs.push_back("-mconstructor-aliases");
 
   // Darwin's kernel doesn't support guard variables; just die if we
   // try to use them.
   if (KernelOrKext && getToolChain().getTriple().isOSDarwin())
     CmdArgs.push_back("-fforbid-guard-variables");
 
   if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields,
                    false)) {
     CmdArgs.push_back("-mms-bitfields");
   }
 
   // This is a coarse approximation of what llvm-gcc actually does, both
   // -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
   // complicated ways.
   bool AsynchronousUnwindTables =
       Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
                    options::OPT_fno_asynchronous_unwind_tables,
                    (getToolChain().IsUnwindTablesDefault() ||
                     getToolChain().getSanitizerArgs().needsUnwindTables()) &&
                        !KernelOrKext);
   if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables,
                    AsynchronousUnwindTables))
     CmdArgs.push_back("-munwind-tables");
 
   getToolChain().addClangTargetOptions(Args, CmdArgs);
 
   if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
     CmdArgs.push_back("-mlimit-float-precision");
     CmdArgs.push_back(A->getValue());
   }
 
   // FIXME: Handle -mtune=.
   (void)Args.hasArg(options::OPT_mtune_EQ);
 
   if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
     CmdArgs.push_back("-mcode-model");
     CmdArgs.push_back(A->getValue());
   }
 
   // Add the target cpu
   std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
   if (!CPU.empty()) {
     CmdArgs.push_back("-target-cpu");
     CmdArgs.push_back(Args.MakeArgString(CPU));
   }
 
   if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
     CmdArgs.push_back("-mfpmath");
     CmdArgs.push_back(A->getValue());
   }
 
   // Add the target features
   getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, false);
 
   // Add target specific flags.
   switch (getToolChain().getArch()) {
   default:
     break;
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     // Use the effective triple, which takes into account the deployment target.
     AddARMTargetArgs(Triple, Args, CmdArgs, KernelOrKext);
     break;
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
     AddAArch64TargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     AddMIPSTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
     AddPPCTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
     AddSparcTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::systemz:
     AddSystemZTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     AddX86TargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::lanai:
     AddLanaiTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::hexagon:
     AddHexagonTargetArgs(Args, CmdArgs);
     break;
 
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     AddWebAssemblyTargetArgs(Args, CmdArgs);
     break;
   }
 
   // The 'g' groups options involve a somewhat intricate sequence of decisions
   // about what to pass from the driver to the frontend, but by the time they
   // reach cc1 they've been factored into three well-defined orthogonal choices:
   //  * what level of debug info to generate
   //  * what dwarf version to write
   //  * what debugger tuning to use
   // This avoids having to monkey around further in cc1 other than to disable
   // codeview if not running in a Windows environment. Perhaps even that
   // decision should be made in the driver as well though.
   unsigned DwarfVersion = 0;
   llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning();
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;
 
   // Add clang-cl arguments.
   types::ID InputType = Input.getType();
   if (getToolChain().getDriver().IsCLMode())
     AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);
 
   // Pass the linker version in use.
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
     CmdArgs.push_back("-target-linker-version");
     CmdArgs.push_back(A->getValue());
   }
 
   if (!shouldUseLeafFramePointer(Args, getToolChain().getTriple()))
     CmdArgs.push_back("-momit-leaf-frame-pointer");
 
   // Explicitly error on some things we know we don't support and can't just
   // ignore.
   if (!Args.hasArg(options::OPT_fallow_unsupported)) {
     Arg *Unsupported;
     if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() &&
         getToolChain().getArch() == llvm::Triple::x86) {
       if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) ||
           (Unsupported = Args.getLastArg(options::OPT_mkernel)))
         D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
             << Unsupported->getOption().getName();
     }
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_v);
   Args.AddLastArg(CmdArgs, options::OPT_H);
   if (D.CCPrintHeaders && !D.CCGenDiagnostics) {
     CmdArgs.push_back("-header-include-file");
     CmdArgs.push_back(D.CCPrintHeadersFilename ? D.CCPrintHeadersFilename
                                                : "-");
   }
   Args.AddLastArg(CmdArgs, options::OPT_P);
   Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);
 
   if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
     CmdArgs.push_back("-diagnostic-log-file");
     CmdArgs.push_back(D.CCLogDiagnosticsFilename ? D.CCLogDiagnosticsFilename
                                                  : "-");
   }
 
   Args.ClaimAllArgs(options::OPT_g_Group);
   Arg *SplitDwarfArg = Args.getLastArg(options::OPT_gsplit_dwarf);
   if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
     // If the last option explicitly specified a debug-info level, use it.
     if (A->getOption().matches(options::OPT_gN_Group)) {
       DebugInfoKind = DebugLevelToInfoKind(*A);
       // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses.
       // But -gsplit-dwarf is not a g_group option, hence we have to check the
       // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.)
       if (SplitDwarfArg && DebugInfoKind < codegenoptions::LimitedDebugInfo &&
           A->getIndex() > SplitDwarfArg->getIndex())
         SplitDwarfArg = nullptr;
     } else
       // For any other 'g' option, use Limited.
       DebugInfoKind = codegenoptions::LimitedDebugInfo;
   }
 
   // If a debugger tuning argument appeared, remember it.
   if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
                                options::OPT_ggdbN_Group)) {
     if (A->getOption().matches(options::OPT_glldb))
       DebuggerTuning = llvm::DebuggerKind::LLDB;
     else if (A->getOption().matches(options::OPT_gsce))
       DebuggerTuning = llvm::DebuggerKind::SCE;
     else
       DebuggerTuning = llvm::DebuggerKind::GDB;
   }
 
   // If a -gdwarf argument appeared, remember it.
   if (Arg *A = Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3,
                                options::OPT_gdwarf_4, options::OPT_gdwarf_5))
     DwarfVersion = DwarfVersionNum(A->getSpelling());
 
   // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility
   // argument parsing.
   if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) {
     // DwarfVersion remains at 0 if no explicit choice was made.
     CmdArgs.push_back("-gcodeview");
   } else if (DwarfVersion == 0 &&
              DebugInfoKind != codegenoptions::NoDebugInfo) {
     DwarfVersion = getToolChain().GetDefaultDwarfVersion();
   }
 
   // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now.
   Args.ClaimAllArgs(options::OPT_g_flags_Group);
 
   // PS4 defaults to no column info
   if (Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info,
                    /*Default=*/ !IsPS4CPU))
     CmdArgs.push_back("-dwarf-column-info");
 
   // FIXME: Move backend command line options to the module.
   if (Args.hasArg(options::OPT_gmodules)) {
     DebugInfoKind = codegenoptions::LimitedDebugInfo;
     CmdArgs.push_back("-dwarf-ext-refs");
     CmdArgs.push_back("-fmodule-format=obj");
   }
 
   // -gsplit-dwarf should turn on -g and enable the backend dwarf
   // splitting and extraction.
   // FIXME: Currently only works on Linux.
   if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) {
     DebugInfoKind = codegenoptions::LimitedDebugInfo;
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-split-dwarf=Enable");
   }
 
   // After we've dealt with all combinations of things that could
   // make DebugInfoKind be other than None or DebugLineTablesOnly,
   // figure out if we need to "upgrade" it to standalone debug info.
   // We parse these two '-f' options whether or not they will be used,
   // to claim them even if you wrote "-fstandalone-debug -gline-tables-only"
   bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug,
                                     options::OPT_fno_standalone_debug,
                                     getToolChain().GetDefaultStandaloneDebug());
   if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug)
     DebugInfoKind = codegenoptions::FullDebugInfo;
   RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
                           DebuggerTuning);
 
   // -ggnu-pubnames turns on gnu style pubnames in the backend.
   if (Args.hasArg(options::OPT_ggnu_pubnames)) {
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-generate-gnu-dwarf-pub-sections");
   }
 
   // -gdwarf-aranges turns on the emission of the aranges section in the
   // backend.
   // Always enabled on the PS4.
   if (Args.hasArg(options::OPT_gdwarf_aranges) || IsPS4CPU) {
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-generate-arange-section");
   }
 
   if (Args.hasFlag(options::OPT_fdebug_types_section,
                    options::OPT_fno_debug_types_section, false)) {
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-generate-type-units");
   }
 
   // CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by
   // default.
   bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI ||
                              Triple.getArch() == llvm::Triple::wasm32 ||
                              Triple.getArch() == llvm::Triple::wasm64;
 
   if (Args.hasFlag(options::OPT_ffunction_sections,
                    options::OPT_fno_function_sections, UseSeparateSections)) {
     CmdArgs.push_back("-ffunction-sections");
   }
 
   if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
                    UseSeparateSections)) {
     CmdArgs.push_back("-fdata-sections");
   }
 
   if (!Args.hasFlag(options::OPT_funique_section_names,
                     options::OPT_fno_unique_section_names, true))
     CmdArgs.push_back("-fno-unique-section-names");
 
   Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
 
   if (Args.hasFlag(options::OPT_fxray_instrument,
                    options::OPT_fnoxray_instrument, false)) {
     CmdArgs.push_back("-fxray-instrument");
     if (const Arg *A =
             Args.getLastArg(options::OPT_fxray_instruction_threshold_,
                             options::OPT_fxray_instruction_threshold_EQ)) {
       CmdArgs.push_back("-fxray-instruction-threshold");
       CmdArgs.push_back(A->getValue());
     }
   }
 
   addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
   // Add runtime flag for PS4 when PGO or Coverage are enabled.
   if (getToolChain().getTriple().isPS4CPU())
     addPS4ProfileRTArgs(getToolChain(), Args, CmdArgs);
 
   // Pass options for controlling the default header search paths.
   if (Args.hasArg(options::OPT_nostdinc)) {
     CmdArgs.push_back("-nostdsysteminc");
     CmdArgs.push_back("-nobuiltininc");
   } else {
     if (Args.hasArg(options::OPT_nostdlibinc))
       CmdArgs.push_back("-nostdsysteminc");
     Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
     Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
   }
 
   // Pass the path to compiler resource files.
   CmdArgs.push_back("-resource-dir");
   CmdArgs.push_back(D.ResourceDir.c_str());
 
   Args.AddLastArg(CmdArgs, options::OPT_working_directory);
 
   bool ARCMTEnabled = false;
   if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) {
     if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check,
                                        options::OPT_ccc_arcmt_modify,
                                        options::OPT_ccc_arcmt_migrate)) {
       ARCMTEnabled = true;
       switch (A->getOption().getID()) {
       default:
         llvm_unreachable("missed a case");
       case options::OPT_ccc_arcmt_check:
         CmdArgs.push_back("-arcmt-check");
         break;
       case options::OPT_ccc_arcmt_modify:
         CmdArgs.push_back("-arcmt-modify");
         break;
       case options::OPT_ccc_arcmt_migrate:
         CmdArgs.push_back("-arcmt-migrate");
         CmdArgs.push_back("-mt-migrate-directory");
         CmdArgs.push_back(A->getValue());
 
         Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output);
         Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors);
         break;
       }
     }
   } else {
     Args.ClaimAllArgs(options::OPT_ccc_arcmt_check);
     Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify);
     Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate);
   }
 
   if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
     if (ARCMTEnabled) {
       D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
                                                       << "-ccc-arcmt-migrate";
     }
     CmdArgs.push_back("-mt-migrate-directory");
     CmdArgs.push_back(A->getValue());
 
     if (!Args.hasArg(options::OPT_objcmt_migrate_literals,
                      options::OPT_objcmt_migrate_subscripting,
                      options::OPT_objcmt_migrate_property)) {
       // None specified, means enable them all.
       CmdArgs.push_back("-objcmt-migrate-literals");
       CmdArgs.push_back("-objcmt-migrate-subscripting");
       CmdArgs.push_back("-objcmt-migrate-property");
     } else {
       Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
       Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
       Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
     }
   } else {
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init);
     Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path);
   }
 
   // Add preprocessing options like -I, -D, etc. if we are using the
   // preprocessor.
   //
   // FIXME: Support -fpreprocessed
   if (types::getPreprocessedType(InputType) != types::TY_INVALID)
     AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
 
   // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
   // that "The compiler can only warn and ignore the option if not recognized".
   // When building with ccache, it will pass -D options to clang even on
   // preprocessed inputs and configure concludes that -fPIC is not supported.
   Args.ClaimAllArgs(options::OPT_D);
 
   // Manually translate -O4 to -O3; let clang reject others.
   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
     if (A->getOption().matches(options::OPT_O4)) {
       CmdArgs.push_back("-O3");
       D.Diag(diag::warn_O4_is_O3);
     } else {
       A->render(Args, CmdArgs);
     }
   }
 
   // Warn about ignored options to clang.
   for (const Arg *A :
        Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
     D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
     A->claim();
   }
 
   claimNoWarnArgs(Args);
 
   Args.AddAllArgs(CmdArgs, options::OPT_R_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_W_Group);
   if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
     CmdArgs.push_back("-pedantic");
   Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
   Args.AddLastArg(CmdArgs, options::OPT_w);
 
   // Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
   // (-ansi is equivalent to -std=c89 or -std=c++98).
   //
   // If a std is supplied, only add -trigraphs if it follows the
   // option.
   bool ImplyVCPPCXXVer = false;
   if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) {
     if (Std->getOption().matches(options::OPT_ansi))
       if (types::isCXX(InputType))
         CmdArgs.push_back("-std=c++98");
       else
         CmdArgs.push_back("-std=c89");
     else
       Std->render(Args, CmdArgs);
 
     // If -f(no-)trigraphs appears after the language standard flag, honor it.
     if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi,
                                  options::OPT_ftrigraphs,
                                  options::OPT_fno_trigraphs))
       if (A != Std)
         A->render(Args, CmdArgs);
   } else {
     // Honor -std-default.
     //
     // FIXME: Clang doesn't correctly handle -std= when the input language
     // doesn't match. For the time being just ignore this for C++ inputs;
     // eventually we want to do all the standard defaulting here instead of
     // splitting it between the driver and clang -cc1.
     if (!types::isCXX(InputType))
       Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=",
                                 /*Joined=*/true);
     else if (IsWindowsMSVC)
       ImplyVCPPCXXVer = true;
 
     Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
                     options::OPT_fno_trigraphs);
   }
 
   // GCC's behavior for -Wwrite-strings is a bit strange:
   //  * In C, this "warning flag" changes the types of string literals from
   //    'char[N]' to 'const char[N]', and thus triggers an unrelated warning
   //    for the discarded qualifier.
   //  * In C++, this is just a normal warning flag.
   //
   // Implementing this warning correctly in C is hard, so we follow GCC's
   // behavior for now. FIXME: Directly diagnose uses of a string literal as
   // a non-const char* in C, rather than using this crude hack.
   if (!types::isCXX(InputType)) {
     // FIXME: This should behave just like a warning flag, and thus should also
     // respect -Weverything, -Wno-everything, -Werror=write-strings, and so on.
     Arg *WriteStrings =
         Args.getLastArg(options::OPT_Wwrite_strings,
                         options::OPT_Wno_write_strings, options::OPT_w);
     if (WriteStrings &&
         WriteStrings->getOption().matches(options::OPT_Wwrite_strings))
       CmdArgs.push_back("-fconst-strings");
   }
 
   // GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active
   // during C++ compilation, which it is by default. GCC keeps this define even
   // in the presence of '-w', match this behavior bug-for-bug.
   if (types::isCXX(InputType) &&
       Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated,
                    true)) {
     CmdArgs.push_back("-fdeprecated-macro");
   }
 
   // Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
   if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
     if (Asm->getOption().matches(options::OPT_fasm))
       CmdArgs.push_back("-fgnu-keywords");
     else
       CmdArgs.push_back("-fno-gnu-keywords");
   }
 
   if (ShouldDisableDwarfDirectory(Args, getToolChain()))
     CmdArgs.push_back("-fno-dwarf-directory-asm");
 
   if (ShouldDisableAutolink(Args, getToolChain()))
     CmdArgs.push_back("-fno-autolink");
 
   // Add in -fdebug-compilation-dir if necessary.
   addDebugCompDirArg(Args, CmdArgs);
 
   for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) {
     StringRef Map = A->getValue();
     if (Map.find('=') == StringRef::npos)
       D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map;
     else
       CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map));
     A->claim();
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
                                options::OPT_ftemplate_depth_EQ)) {
     CmdArgs.push_back("-ftemplate-depth");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) {
     CmdArgs.push_back("-foperator-arrow-depth");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) {
     CmdArgs.push_back("-fconstexpr-depth");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) {
     CmdArgs.push_back("-fconstexpr-steps");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) {
     CmdArgs.push_back("-fbracket-depth");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ,
                                options::OPT_Wlarge_by_value_copy_def)) {
     if (A->getNumValues()) {
       StringRef bytes = A->getValue();
       CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes));
     } else
       CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
   }
 
   if (Args.hasArg(options::OPT_relocatable_pch))
     CmdArgs.push_back("-relocatable-pch");
 
   if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) {
     CmdArgs.push_back("-fconstant-string-class");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) {
     CmdArgs.push_back("-ftabstop");
     CmdArgs.push_back(A->getValue());
   }
 
   CmdArgs.push_back("-ferror-limit");
   if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
     CmdArgs.push_back(A->getValue());
   else
     CmdArgs.push_back("19");
 
   if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) {
     CmdArgs.push_back("-fmacro-backtrace-limit");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) {
     CmdArgs.push_back("-ftemplate-backtrace-limit");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) {
     CmdArgs.push_back("-fconstexpr-backtrace-limit");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) {
     CmdArgs.push_back("-fspell-checking-limit");
     CmdArgs.push_back(A->getValue());
   }
 
   // Pass -fmessage-length=.
   CmdArgs.push_back("-fmessage-length");
   if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) {
     CmdArgs.push_back(A->getValue());
   } else {
     // If -fmessage-length=N was not specified, determine whether this is a
     // terminal and, if so, implicitly define -fmessage-length appropriately.
     unsigned N = llvm::sys::Process::StandardErrColumns();
     CmdArgs.push_back(Args.MakeArgString(Twine(N)));
   }
 
   // -fvisibility= and -fvisibility-ms-compat are of a piece.
   if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ,
                                      options::OPT_fvisibility_ms_compat)) {
     if (A->getOption().matches(options::OPT_fvisibility_EQ)) {
       CmdArgs.push_back("-fvisibility");
       CmdArgs.push_back(A->getValue());
     } else {
       assert(A->getOption().matches(options::OPT_fvisibility_ms_compat));
       CmdArgs.push_back("-fvisibility");
       CmdArgs.push_back("hidden");
       CmdArgs.push_back("-ftype-visibility");
       CmdArgs.push_back("default");
     }
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden);
 
   Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
 
   // -fhosted is default.
   if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) ||
       KernelOrKext)
     CmdArgs.push_back("-ffreestanding");
 
   // Forward -f (flag) options which we can pass directly.
   Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
   Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
   // Emulated TLS is enabled by default on Android, and can be enabled manually
   // with -femulated-tls.
   bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment();
   if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
                    EmulatedTLSDefault))
     CmdArgs.push_back("-femulated-tls");
   // AltiVec-like language extensions aren't relevant for assembling.
   if (!isa<PreprocessJobAction>(JA) || Output.getType() != types::TY_PP_Asm) {
     Args.AddLastArg(CmdArgs, options::OPT_faltivec);
     Args.AddLastArg(CmdArgs, options::OPT_fzvector);
   }
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
   Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
 
   // Forward flags for OpenMP. We don't do this if the current action is an
   // device offloading action.
   //
   // TODO: Allow OpenMP offload actions when they become available.
   if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
                    options::OPT_fno_openmp, false) &&
       JA.isDeviceOffloading(Action::OFK_None)) {
     switch (getOpenMPRuntime(getToolChain(), Args)) {
     case OMPRT_OMP:
     case OMPRT_IOMP5:
       // Clang can generate useful OpenMP code for these two runtime libraries.
       CmdArgs.push_back("-fopenmp");
 
       // If no option regarding the use of TLS in OpenMP codegeneration is
       // given, decide a default based on the target. Otherwise rely on the
       // options and pass the right information to the frontend.
       if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
                         options::OPT_fnoopenmp_use_tls, /*Default=*/true))
         CmdArgs.push_back("-fnoopenmp-use-tls");
       Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
       // for a specific runtime library, we just don't pass the '-fopenmp' flag
       // down to the actual compilation.
       // FIXME: It would be better to have a mode which *only* omits IR
       // generation based on the OpenMP support so that we get consistent
       // semantic analysis, etc.
       break;
     }
   }
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
   Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
 
   // Report an error for -faltivec on anything other than PowerPC.
   if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) {
     const llvm::Triple::ArchType Arch = getToolChain().getArch();
     if (!(Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 ||
           Arch == llvm::Triple::ppc64le))
       D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
                                                        << "ppc/ppc64/ppc64le";
   }
 
   // -fzvector is incompatible with -faltivec.
   if (Arg *A = Args.getLastArg(options::OPT_fzvector))
     if (Args.hasArg(options::OPT_faltivec))
       D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
                                                       << "-faltivec";
 
   if (getToolChain().SupportsProfiling())
     Args.AddLastArg(CmdArgs, options::OPT_pg);
 
   // -flax-vector-conversions is default.
   if (!Args.hasFlag(options::OPT_flax_vector_conversions,
                     options::OPT_fno_lax_vector_conversions))
     CmdArgs.push_back("-fno-lax-vector-conversions");
 
   if (Args.getLastArg(options::OPT_fapple_kext) ||
       (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType)))
     CmdArgs.push_back("-fapple-kext");
 
   Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
   Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
   Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
 
   if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
     CmdArgs.push_back("-ftrapv-handler");
     CmdArgs.push_back(A->getValue());
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);
 
   // -fno-strict-overflow implies -fwrapv if it isn't disabled, but
   // -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
   if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
     if (A->getOption().matches(options::OPT_fwrapv))
       CmdArgs.push_back("-fwrapv");
   } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
                                       options::OPT_fno_strict_overflow)) {
     if (A->getOption().matches(options::OPT_fno_strict_overflow))
       CmdArgs.push_back("-fwrapv");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_freroll_loops,
                                options::OPT_fno_reroll_loops))
     if (A->getOption().matches(options::OPT_freroll_loops))
       CmdArgs.push_back("-freroll-loops");
 
   Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
   Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
                   options::OPT_fno_unroll_loops);
 
   Args.AddLastArg(CmdArgs, options::OPT_pthread);
 
   // -stack-protector=0 is default.
   unsigned StackProtectorLevel = 0;
   if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
                                options::OPT_fstack_protector_all,
                                options::OPT_fstack_protector_strong,
                                options::OPT_fstack_protector)) {
     if (A->getOption().matches(options::OPT_fstack_protector)) {
       StackProtectorLevel = std::max<unsigned>(
           LangOptions::SSPOn,
           getToolChain().GetDefaultStackProtectorLevel(KernelOrKext));
     } else if (A->getOption().matches(options::OPT_fstack_protector_strong))
       StackProtectorLevel = LangOptions::SSPStrong;
     else if (A->getOption().matches(options::OPT_fstack_protector_all))
       StackProtectorLevel = LangOptions::SSPReq;
   } else {
     StackProtectorLevel =
         getToolChain().GetDefaultStackProtectorLevel(KernelOrKext);
   }
   if (StackProtectorLevel) {
     CmdArgs.push_back("-stack-protector");
     CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel)));
   }
 
   // --param ssp-buffer-size=
   for (const Arg *A : Args.filtered(options::OPT__param)) {
     StringRef Str(A->getValue());
     if (Str.startswith("ssp-buffer-size=")) {
       if (StackProtectorLevel) {
         CmdArgs.push_back("-stack-protector-buffer-size");
         // FIXME: Verify the argument is a valid integer.
         CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
       }
       A->claim();
     }
   }
 
   // Translate -mstackrealign
   if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
                    false))
     CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));
 
   if (Args.hasArg(options::OPT_mstack_alignment)) {
     StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
     CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
   }
 
   if (Args.hasArg(options::OPT_mstack_probe_size)) {
     StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size);
 
     if (!Size.empty())
       CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size));
     else
       CmdArgs.push_back("-mstack-probe-size=0");
   }
 
   switch (getToolChain().getArch()) {
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     CmdArgs.push_back("-fallow-half-arguments-and-returns");
     break;
 
   default:
     break;
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
                                options::OPT_mno_restrict_it)) {
     if (A->getOption().matches(options::OPT_mrestrict_it)) {
       CmdArgs.push_back("-backend-option");
       CmdArgs.push_back("-arm-restrict-it");
     } else {
       CmdArgs.push_back("-backend-option");
       CmdArgs.push_back("-arm-no-restrict-it");
     }
   } else if (Triple.isOSWindows() &&
              (Triple.getArch() == llvm::Triple::arm ||
               Triple.getArch() == llvm::Triple::thumb)) {
     // Windows on ARM expects restricted IT blocks
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-arm-restrict-it");
   }
 
   // Forward -cl options to -cc1
   if (Args.getLastArg(options::OPT_cl_opt_disable)) {
     CmdArgs.push_back("-cl-opt-disable");
   }
   if (Args.getLastArg(options::OPT_cl_strict_aliasing)) {
     CmdArgs.push_back("-cl-strict-aliasing");
   }
   if (Args.getLastArg(options::OPT_cl_single_precision_constant)) {
     CmdArgs.push_back("-cl-single-precision-constant");
   }
   if (Args.getLastArg(options::OPT_cl_finite_math_only)) {
     CmdArgs.push_back("-cl-finite-math-only");
   }
   if (Args.getLastArg(options::OPT_cl_kernel_arg_info)) {
     CmdArgs.push_back("-cl-kernel-arg-info");
   }
   if (Args.getLastArg(options::OPT_cl_unsafe_math_optimizations)) {
     CmdArgs.push_back("-cl-unsafe-math-optimizations");
   }
   if (Args.getLastArg(options::OPT_cl_fast_relaxed_math)) {
     CmdArgs.push_back("-cl-fast-relaxed-math");
   }
   if (Args.getLastArg(options::OPT_cl_mad_enable)) {
     CmdArgs.push_back("-cl-mad-enable");
   }
   if (Args.getLastArg(options::OPT_cl_no_signed_zeros)) {
     CmdArgs.push_back("-cl-no-signed-zeros");
   }
   if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
     std::string CLStdStr = "-cl-std=";
     CLStdStr += A->getValue();
     CmdArgs.push_back(Args.MakeArgString(CLStdStr));
   }
   if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) {
     CmdArgs.push_back("-cl-denorms-are-zero");
   }
 
   // Forward -f options with positive and negative forms; we translate
   // these by hand.
   if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
     StringRef fname = A->getValue();
     if (!llvm::sys::fs::exists(fname))
       D.Diag(diag::err_drv_no_such_file) << fname;
     else
       A->render(Args, CmdArgs);
   }
 
   // -fbuiltin is default unless -mkernel is used.
   bool UseBuiltins =
       Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin,
                    !Args.hasArg(options::OPT_mkernel));
   if (!UseBuiltins)
     CmdArgs.push_back("-fno-builtin");
 
   // -ffreestanding implies -fno-builtin.
   if (Args.hasArg(options::OPT_ffreestanding))
     UseBuiltins = false;
 
   // Process the -fno-builtin-* options.
   for (const auto &Arg : Args) {
     const Option &O = Arg->getOption();
     if (!O.matches(options::OPT_fno_builtin_))
       continue;
 
     Arg->claim();
     // If -fno-builtin is specified, then there's no need to pass the option to
     // the frontend.
     if (!UseBuiltins)
       continue;
 
     StringRef FuncName = Arg->getValue();
     CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName));
   }
 
   if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
                     options::OPT_fno_assume_sane_operator_new))
     CmdArgs.push_back("-fno-assume-sane-operator-new");
 
   // -fblocks=0 is default.
   if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
                    getToolChain().IsBlocksDefault()) ||
       (Args.hasArg(options::OPT_fgnu_runtime) &&
        Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
        !Args.hasArg(options::OPT_fno_blocks))) {
     CmdArgs.push_back("-fblocks");
 
     if (!Args.hasArg(options::OPT_fgnu_runtime) &&
         !getToolChain().hasBlocksRuntime())
       CmdArgs.push_back("-fblocks-runtime-optional");
   }
 
   // -fmodules enables the use of precompiled modules (off by default).
   // Users can pass -fno-cxx-modules to turn off modules support for
   // C++/Objective-C++ programs.
   bool HaveModules = false;
   if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
     bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
                                      options::OPT_fno_cxx_modules, true);
     if (AllowedInCXX || !types::isCXX(InputType)) {
       CmdArgs.push_back("-fmodules");
       HaveModules = true;
     }
   }
 
   // -fmodule-maps enables implicit reading of module map files. By default,
   // this is enabled if we are using precompiled modules.
   if (Args.hasFlag(options::OPT_fimplicit_module_maps,
                    options::OPT_fno_implicit_module_maps, HaveModules)) {
     CmdArgs.push_back("-fimplicit-module-maps");
   }
 
   // -fmodules-decluse checks that modules used are declared so (off by
   // default).
   if (Args.hasFlag(options::OPT_fmodules_decluse,
                    options::OPT_fno_modules_decluse, false)) {
     CmdArgs.push_back("-fmodules-decluse");
   }
 
   // -fmodules-strict-decluse is like -fmodule-decluse, but also checks that
   // all #included headers are part of modules.
   if (Args.hasFlag(options::OPT_fmodules_strict_decluse,
                    options::OPT_fno_modules_strict_decluse, false)) {
     CmdArgs.push_back("-fmodules-strict-decluse");
   }
 
   // -fno-implicit-modules turns off implicitly compiling modules on demand.
   if (!Args.hasFlag(options::OPT_fimplicit_modules,
                     options::OPT_fno_implicit_modules)) {
     CmdArgs.push_back("-fno-implicit-modules");
   } else if (HaveModules) {
     // -fmodule-cache-path specifies where our implicitly-built module files
     // should be written.
     SmallString<128> Path;
     if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
       Path = A->getValue();
     if (C.isForDiagnostics()) {
       // When generating crash reports, we want to emit the modules along with
       // the reproduction sources, so we ignore any provided module path.
       Path = Output.getFilename();
       llvm::sys::path::replace_extension(Path, ".cache");
       llvm::sys::path::append(Path, "modules");
     } else if (Path.empty()) {
       // No module path was provided: use the default.
       llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Path);
       llvm::sys::path::append(Path, "org.llvm.clang.");
       appendUserToPath(Path);
       llvm::sys::path::append(Path, "ModuleCache");
     }
     const char Arg[] = "-fmodules-cache-path=";
     Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
     CmdArgs.push_back(Args.MakeArgString(Path));
   }
 
   // -fmodule-name specifies the module that is currently being built (or
   // used for header checking by -fmodule-maps).
   Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ);
 
   // -fmodule-map-file can be used to specify files containing module
   // definitions.
   Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);
 
   // -fmodule-file can be used to specify files containing precompiled modules.
   if (HaveModules)
     Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
   else
     Args.ClaimAllArgs(options::OPT_fmodule_file);
 
   // When building modules and generating crashdumps, we need to dump a module
   // dependency VFS alongside the output.
   if (HaveModules && C.isForDiagnostics()) {
     SmallString<128> VFSDir(Output.getFilename());
     llvm::sys::path::replace_extension(VFSDir, ".cache");
     // Add the cache directory as a temp so the crash diagnostics pick it up.
     C.addTempFile(Args.MakeArgString(VFSDir));
 
     llvm::sys::path::append(VFSDir, "vfs");
     CmdArgs.push_back("-module-dependency-dir");
     CmdArgs.push_back(Args.MakeArgString(VFSDir));
   }
 
   if (HaveModules)
     Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path);
 
   // Pass through all -fmodules-ignore-macro arguments.
   Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro);
   Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval);
   Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after);
 
   Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp);
 
   if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) {
     if (Args.hasArg(options::OPT_fbuild_session_timestamp))
       D.Diag(diag::err_drv_argument_not_allowed_with)
           << A->getAsString(Args) << "-fbuild-session-timestamp";
 
     llvm::sys::fs::file_status Status;
     if (llvm::sys::fs::status(A->getValue(), Status))
       D.Diag(diag::err_drv_no_such_file) << A->getValue();
     CmdArgs.push_back(Args.MakeArgString(
         "-fbuild-session-timestamp=" +
         Twine((uint64_t)Status.getLastModificationTime().toEpochTime())));
   }
 
   if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) {
     if (!Args.getLastArg(options::OPT_fbuild_session_timestamp,
                          options::OPT_fbuild_session_file))
       D.Diag(diag::err_drv_modules_validate_once_requires_timestamp);
 
     Args.AddLastArg(CmdArgs,
                     options::OPT_fmodules_validate_once_per_build_session);
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_system_headers);
 
   // -faccess-control is default.
   if (Args.hasFlag(options::OPT_fno_access_control,
                    options::OPT_faccess_control, false))
     CmdArgs.push_back("-fno-access-control");
 
   // -felide-constructors is the default.
   if (Args.hasFlag(options::OPT_fno_elide_constructors,
                    options::OPT_felide_constructors, false))
     CmdArgs.push_back("-fno-elide-constructors");
 
   ToolChain::RTTIMode RTTIMode = getToolChain().getRTTIMode();
 
   if (KernelOrKext || (types::isCXX(InputType) &&
                        (RTTIMode == ToolChain::RM_DisabledExplicitly ||
                         RTTIMode == ToolChain::RM_DisabledImplicitly)))
     CmdArgs.push_back("-fno-rtti");
 
   // -fshort-enums=0 is default for all architectures except Hexagon.
   if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums,
                    getToolChain().getArch() == llvm::Triple::hexagon))
     CmdArgs.push_back("-fshort-enums");
 
   // -fsigned-char is default.
   if (Arg *A = Args.getLastArg(
           options::OPT_fsigned_char, options::OPT_fno_signed_char,
           options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) {
     if (A->getOption().matches(options::OPT_funsigned_char) ||
         A->getOption().matches(options::OPT_fno_signed_char)) {
       CmdArgs.push_back("-fno-signed-char");
     }
   } else if (!isSignedCharDefault(getToolChain().getTriple())) {
     CmdArgs.push_back("-fno-signed-char");
   }
 
   // -fuse-cxa-atexit is default.
   if (!Args.hasFlag(
           options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
           !IsWindowsCygnus && !IsWindowsGNU &&
               getToolChain().getTriple().getOS() != llvm::Triple::Solaris &&
               getToolChain().getArch() != llvm::Triple::hexagon &&
               getToolChain().getArch() != llvm::Triple::xcore &&
               ((getToolChain().getTriple().getVendor() !=
                 llvm::Triple::MipsTechnologies) ||
                getToolChain().getTriple().hasEnvironment())) ||
       KernelOrKext)
     CmdArgs.push_back("-fno-use-cxa-atexit");
 
   // -fms-extensions=0 is default.
   if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
                    IsWindowsMSVC))
     CmdArgs.push_back("-fms-extensions");
 
   // -fno-use-line-directives is default.
   if (Args.hasFlag(options::OPT_fuse_line_directives,
                    options::OPT_fno_use_line_directives, false))
     CmdArgs.push_back("-fuse-line-directives");
 
   // -fms-compatibility=0 is default.
   if (Args.hasFlag(options::OPT_fms_compatibility,
                    options::OPT_fno_ms_compatibility,
                    (IsWindowsMSVC &&
                     Args.hasFlag(options::OPT_fms_extensions,
                                  options::OPT_fno_ms_extensions, true))))
     CmdArgs.push_back("-fms-compatibility");
 
   // -fms-compatibility-version=18.00 is default.
   VersionTuple MSVT = visualstudio::getMSVCVersion(
       &D, getToolChain(), getToolChain().getTriple(), Args, IsWindowsMSVC);
   if (!MSVT.empty())
     CmdArgs.push_back(
         Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));
 
   bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
   if (ImplyVCPPCXXVer) {
     StringRef LanguageStandard;
     if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
       LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
                              .Case("c++14", "-std=c++14")
                              .Case("c++latest", "-std=c++1z")
                              .Default("");
       if (LanguageStandard.empty())
         D.Diag(clang::diag::warn_drv_unused_argument)
             << StdArg->getAsString(Args);
     }
 
     if (LanguageStandard.empty()) {
       if (IsMSVC2015Compatible)
         LanguageStandard = "-std=c++14";
       else
         LanguageStandard = "-std=c++11";
     }
 
     CmdArgs.push_back(LanguageStandard.data());
   }
 
   // -fno-borland-extensions is default.
   if (Args.hasFlag(options::OPT_fborland_extensions,
                    options::OPT_fno_borland_extensions, false))
     CmdArgs.push_back("-fborland-extensions");
 
   // -fno-declspec is default, except for PS4.
   if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec,
                    getToolChain().getTriple().isPS4()))
     CmdArgs.push_back("-fdeclspec");
   else if (Args.hasArg(options::OPT_fno_declspec))
     CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec.
 
   // -fthreadsafe-static is default, except for MSVC compatibility versions less
   // than 19.
   if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
                     options::OPT_fno_threadsafe_statics,
                     !IsWindowsMSVC || IsMSVC2015Compatible))
     CmdArgs.push_back("-fno-threadsafe-statics");
 
   // -fno-delayed-template-parsing is default, except for Windows where MSVC STL
   // needs it.
   if (Args.hasFlag(options::OPT_fdelayed_template_parsing,
                    options::OPT_fno_delayed_template_parsing, IsWindowsMSVC))
     CmdArgs.push_back("-fdelayed-template-parsing");
 
   // -fgnu-keywords default varies depending on language; only pass if
   // specified.
   if (Arg *A = Args.getLastArg(options::OPT_fgnu_keywords,
                                options::OPT_fno_gnu_keywords))
     A->render(Args, CmdArgs);
 
   if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline,
                    false))
     CmdArgs.push_back("-fgnu89-inline");
 
   if (Args.hasArg(options::OPT_fno_inline))
     CmdArgs.push_back("-fno-inline");
 
   if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions,
                                        options::OPT_finline_hint_functions,
                                        options::OPT_fno_inline_functions))
     InlineArg->render(Args, CmdArgs);
 
   ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind);
 
   // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and
   // legacy is the default. Except for deployment taget of 10.5,
   // next runtime is always legacy dispatch and -fno-objc-legacy-dispatch
   // gets ignored silently.
   if (objcRuntime.isNonFragile()) {
     if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
                       options::OPT_fno_objc_legacy_dispatch,
                       objcRuntime.isLegacyDispatchDefaultForArch(
                           getToolChain().getArch()))) {
       if (getToolChain().UseObjCMixedDispatch())
         CmdArgs.push_back("-fobjc-dispatch-method=mixed");
       else
         CmdArgs.push_back("-fobjc-dispatch-method=non-legacy");
     }
   }
 
   // When ObjectiveC legacy runtime is in effect on MacOSX,
   // turn on the option to do Array/Dictionary subscripting
   // by default.
   if (getToolChain().getArch() == llvm::Triple::x86 &&
       getToolChain().getTriple().isMacOSX() &&
       !getToolChain().getTriple().isMacOSXVersionLT(10, 7) &&
       objcRuntime.getKind() == ObjCRuntime::FragileMacOSX &&
       objcRuntime.isNeXTFamily())
     CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");
 
   // -fencode-extended-block-signature=1 is default.
   if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) {
     CmdArgs.push_back("-fencode-extended-block-signature");
   }
 
   // Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
   // NOTE: This logic is duplicated in ToolChains.cpp.
   bool ARC = isObjCAutoRefCount(Args);
   if (ARC) {
     getToolChain().CheckObjCARC();
 
     CmdArgs.push_back("-fobjc-arc");
 
     // FIXME: It seems like this entire block, and several around it should be
     // wrapped in isObjC, but for now we just use it here as this is where it
     // was being used previously.
     if (types::isCXX(InputType) && types::isObjC(InputType)) {
       if (getToolChain().GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
         CmdArgs.push_back("-fobjc-arc-cxxlib=libc++");
       else
         CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++");
     }
 
     // Allow the user to enable full exceptions code emission.
     // We define off for Objective-CC, on for Objective-C++.
     if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
                      options::OPT_fno_objc_arc_exceptions,
                      /*default*/ types::isCXX(InputType)))
       CmdArgs.push_back("-fobjc-arc-exceptions");
 
   }
 
   // -fobjc-infer-related-result-type is the default, except in the Objective-C
   // rewriter.
   if (rewriteKind != RK_None)
     CmdArgs.push_back("-fno-objc-infer-related-result-type");
 
   // Handle -fobjc-gc and -fobjc-gc-only. They are exclusive, and -fobjc-gc-only
   // takes precedence.
   const Arg *GCArg = Args.getLastArg(options::OPT_fobjc_gc_only);
   if (!GCArg)
     GCArg = Args.getLastArg(options::OPT_fobjc_gc);
   if (GCArg) {
     if (ARC) {
       D.Diag(diag::err_drv_objc_gc_arr) << GCArg->getAsString(Args);
     } else if (getToolChain().SupportsObjCGC()) {
       GCArg->render(Args, CmdArgs);
     } else {
       // FIXME: We should move this to a hard error.
       D.Diag(diag::warn_drv_objc_gc_unsupported) << GCArg->getAsString(Args);
     }
   }
 
   // Pass down -fobjc-weak or -fno-objc-weak if present.
   if (types::isObjC(InputType)) {
     auto WeakArg = Args.getLastArg(options::OPT_fobjc_weak,
                                    options::OPT_fno_objc_weak);
     if (!WeakArg) {
       // nothing to do
     } else if (GCArg) {
       if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
         D.Diag(diag::err_objc_weak_with_gc);
     } else if (!objcRuntime.allowsWeak()) {
       if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
         D.Diag(diag::err_objc_weak_unsupported);
     } else {
       WeakArg->render(Args, CmdArgs);
     }
   }
 
   if (Args.hasFlag(options::OPT_fapplication_extension,
                    options::OPT_fno_application_extension, false))
     CmdArgs.push_back("-fapplication-extension");
 
   // Handle GCC-style exception args.
   if (!C.getDriver().IsCLMode())
     addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime,
                      CmdArgs);
 
   if (Args.hasArg(options::OPT_fsjlj_exceptions) ||
       getToolChain().UseSjLjExceptions(Args))
     CmdArgs.push_back("-fsjlj-exceptions");
 
   // C++ "sane" operator new.
   if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
                     options::OPT_fno_assume_sane_operator_new))
     CmdArgs.push_back("-fno-assume-sane-operator-new");
 
   // -fsized-deallocation is off by default, as it is an ABI-breaking change for
   // most platforms.
   if (Args.hasFlag(options::OPT_fsized_deallocation,
                    options::OPT_fno_sized_deallocation, false))
     CmdArgs.push_back("-fsized-deallocation");
 
   // -fconstant-cfstrings is default, and may be subject to argument translation
   // on Darwin.
   if (!Args.hasFlag(options::OPT_fconstant_cfstrings,
                     options::OPT_fno_constant_cfstrings) ||
       !Args.hasFlag(options::OPT_mconstant_cfstrings,
                     options::OPT_mno_constant_cfstrings))
     CmdArgs.push_back("-fno-constant-cfstrings");
 
   // -fshort-wchar default varies depending on platform; only
   // pass if specified.
   if (Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
                                options::OPT_fno_short_wchar))
     A->render(Args, CmdArgs);
 
   // -fno-pascal-strings is default, only pass non-default.
   if (Args.hasFlag(options::OPT_fpascal_strings,
                    options::OPT_fno_pascal_strings, false))
     CmdArgs.push_back("-fpascal-strings");
 
   // Honor -fpack-struct= and -fpack-struct, if given. Note that
   // -fno-pack-struct doesn't apply to -fpack-struct=.
   if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) {
     std::string PackStructStr = "-fpack-struct=";
     PackStructStr += A->getValue();
     CmdArgs.push_back(Args.MakeArgString(PackStructStr));
   } else if (Args.hasFlag(options::OPT_fpack_struct,
                           options::OPT_fno_pack_struct, false)) {
     CmdArgs.push_back("-fpack-struct=1");
   }
 
   // Handle -fmax-type-align=N and -fno-type-align
   bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align);
   if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) {
     if (!SkipMaxTypeAlign) {
       std::string MaxTypeAlignStr = "-fmax-type-align=";
       MaxTypeAlignStr += A->getValue();
       CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
     }
   } else if (getToolChain().getTriple().isOSDarwin()) {
     if (!SkipMaxTypeAlign) {
       std::string MaxTypeAlignStr = "-fmax-type-align=16";
       CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
     }
   }
 
   // -fcommon is the default unless compiling kernel code or the target says so
   bool NoCommonDefault =
       KernelOrKext || isNoCommonDefault(getToolChain().getTriple());
   if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common,
                     !NoCommonDefault))
     CmdArgs.push_back("-fno-common");
 
   // -fsigned-bitfields is default, and clang doesn't yet support
   // -funsigned-bitfields.
   if (!Args.hasFlag(options::OPT_fsigned_bitfields,
                     options::OPT_funsigned_bitfields))
     D.Diag(diag::warn_drv_clang_unsupported)
         << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);
 
   // -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
   if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope))
     D.Diag(diag::err_drv_clang_unsupported)
         << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);
 
   // -finput_charset=UTF-8 is default. Reject others
   if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
     StringRef value = inputCharset->getValue();
     if (value != "UTF-8")
       D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
                                           << value;
   }
 
   // -fexec_charset=UTF-8 is default. Reject others
   if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
     StringRef value = execCharset->getValue();
     if (value != "UTF-8")
       D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
                                           << value;
   }
 
   // -fcaret-diagnostics is default.
   if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
                     options::OPT_fno_caret_diagnostics, true))
     CmdArgs.push_back("-fno-caret-diagnostics");
 
   // -fdiagnostics-fixit-info is default, only pass non-default.
   if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info,
                     options::OPT_fno_diagnostics_fixit_info))
     CmdArgs.push_back("-fno-diagnostics-fixit-info");
 
   // Enable -fdiagnostics-show-option by default.
   if (Args.hasFlag(options::OPT_fdiagnostics_show_option,
                    options::OPT_fno_diagnostics_show_option))
     CmdArgs.push_back("-fdiagnostics-show-option");
 
   if (const Arg *A =
           Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
     CmdArgs.push_back("-fdiagnostics-show-category");
     CmdArgs.push_back(A->getValue());
   }
 
   if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
     CmdArgs.push_back("-fdiagnostics-format");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(
           options::OPT_fdiagnostics_show_note_include_stack,
           options::OPT_fno_diagnostics_show_note_include_stack)) {
     if (A->getOption().matches(
             options::OPT_fdiagnostics_show_note_include_stack))
       CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
     else
       CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
   }
 
   // Color diagnostics are parsed by the driver directly from argv
   // and later re-parsed to construct this job; claim any possible
   // color diagnostic here to avoid warn_drv_unused_argument and
   // diagnose bad OPT_fdiagnostics_color_EQ values.
   for (Arg *A : Args) {
     const Option &O = A->getOption();
     if (!O.matches(options::OPT_fcolor_diagnostics) &&
         !O.matches(options::OPT_fdiagnostics_color) &&
         !O.matches(options::OPT_fno_color_diagnostics) &&
         !O.matches(options::OPT_fno_diagnostics_color) &&
         !O.matches(options::OPT_fdiagnostics_color_EQ))
       continue;
     if (O.matches(options::OPT_fdiagnostics_color_EQ)) {
       StringRef Value(A->getValue());
       if (Value != "always" && Value != "never" && Value != "auto")
         getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
               << ("-fdiagnostics-color=" + Value).str();
     }
     A->claim();
   }
   if (D.getDiags().getDiagnosticOptions().ShowColors)
     CmdArgs.push_back("-fcolor-diagnostics");
 
   if (Args.hasArg(options::OPT_fansi_escape_codes))
     CmdArgs.push_back("-fansi-escape-codes");
 
   if (!Args.hasFlag(options::OPT_fshow_source_location,
                     options::OPT_fno_show_source_location))
     CmdArgs.push_back("-fno-show-source-location");
 
   if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
                     true))
     CmdArgs.push_back("-fno-show-column");
 
   if (!Args.hasFlag(options::OPT_fspell_checking,
                     options::OPT_fno_spell_checking))
     CmdArgs.push_back("-fno-spell-checking");
 
   // -fno-asm-blocks is default.
   if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks,
                    false))
     CmdArgs.push_back("-fasm-blocks");
 
   // -fgnu-inline-asm is default.
   if (!Args.hasFlag(options::OPT_fgnu_inline_asm,
                     options::OPT_fno_gnu_inline_asm, true))
     CmdArgs.push_back("-fno-gnu-inline-asm");
 
   // Enable vectorization per default according to the optimization level
   // selected. For optimization levels that want vectorization we use the alias
   // option to simplify the hasFlag logic.
   bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
   OptSpecifier VectorizeAliasOption =
       EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
   if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
                    options::OPT_fno_vectorize, EnableVec))
     CmdArgs.push_back("-vectorize-loops");
 
   // -fslp-vectorize is enabled based on the optimization level selected.
   bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
   OptSpecifier SLPVectAliasOption =
       EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
   if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
 
   // -fno-slp-vectorize-aggressive is default.
   if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive,
                    options::OPT_fno_slp_vectorize_aggressive, false))
     CmdArgs.push_back("-vectorize-slp-aggressive");
 
   if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ))
     A->render(Args, CmdArgs);
 
   if (Arg *A = Args.getLastArg(
           options::OPT_fsanitize_undefined_strip_path_components_EQ))
     A->render(Args, CmdArgs);
 
   // -fdollars-in-identifiers default varies depending on platform and
   // language; only pass if specified.
   if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
                                options::OPT_fno_dollars_in_identifiers)) {
     if (A->getOption().matches(options::OPT_fdollars_in_identifiers))
       CmdArgs.push_back("-fdollars-in-identifiers");
     else
       CmdArgs.push_back("-fno-dollars-in-identifiers");
   }
 
   // -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for
   // practical purposes.
   if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time,
                                options::OPT_fno_unit_at_a_time)) {
     if (A->getOption().matches(options::OPT_fno_unit_at_a_time))
       D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
   }
 
   if (Args.hasFlag(options::OPT_fapple_pragma_pack,
                    options::OPT_fno_apple_pragma_pack, false))
     CmdArgs.push_back("-fapple-pragma-pack");
 
   // le32-specific flags:
   //  -fno-math-builtin: clang should not convert math builtins to intrinsics
   //                     by default.
   if (getToolChain().getArch() == llvm::Triple::le32) {
     CmdArgs.push_back("-fno-math-builtin");
   }
 
 // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
 //
 // FIXME: Now that PR4941 has been fixed this can be enabled.
 #if 0
   if (getToolChain().getTriple().isOSDarwin() &&
       (getToolChain().getArch() == llvm::Triple::arm ||
        getToolChain().getArch() == llvm::Triple::thumb)) {
     if (!Args.hasArg(options::OPT_fbuiltin_strcat))
       CmdArgs.push_back("-fno-builtin-strcat");
     if (!Args.hasArg(options::OPT_fbuiltin_strcpy))
       CmdArgs.push_back("-fno-builtin-strcpy");
   }
 #endif
 
   // Enable rewrite includes if the user's asked for it or if we're generating
   // diagnostics.
   // TODO: Once -module-dependency-dir works with -frewrite-includes it'd be
   // nice to enable this when doing a crashdump for modules as well.
   if (Args.hasFlag(options::OPT_frewrite_includes,
                    options::OPT_fno_rewrite_includes, false) ||
       (C.isForDiagnostics() && !HaveModules))
     CmdArgs.push_back("-frewrite-includes");
 
   // Only allow -traditional or -traditional-cpp outside in preprocessing modes.
   if (Arg *A = Args.getLastArg(options::OPT_traditional,
                                options::OPT_traditional_cpp)) {
     if (isa<PreprocessJobAction>(JA))
       CmdArgs.push_back("-traditional-cpp");
     else
       D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_dM);
   Args.AddLastArg(CmdArgs, options::OPT_dD);
 
   // Handle serialized diagnostics.
   if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
     CmdArgs.push_back("-serialize-diagnostic-file");
     CmdArgs.push_back(Args.MakeArgString(A->getValue()));
   }
 
   if (Args.hasArg(options::OPT_fretain_comments_from_system_headers))
     CmdArgs.push_back("-fretain-comments-from-system-headers");
 
   // Forward -fcomment-block-commands to -cc1.
   Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands);
   // Forward -fparse-all-comments to -cc1.
   Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);
 
   // Turn -fplugin=name.so into -load name.so
   for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) {
     CmdArgs.push_back("-load");
     CmdArgs.push_back(A->getValue());
     A->claim();
   }
 
   // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
   // parser.
   Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
     A->claim();
 
     // We translate this by hand to the -cc1 argument, since nightly test uses
     // it and developers have been trained to spell it with -mllvm.
     if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
       CmdArgs.push_back("-disable-llvm-optzns");
     } else
       A->render(Args, CmdArgs);
   }
 
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
   // When -fembed-bitcode is enabled, optimized bitcode is emitted because it
   // has slightly different breakdown between stages.
   // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
   // pristine IR generated by the frontend. Ideally, a new compile action should
   // be added so both IR can be captured.
   if (C.getDriver().isSaveTempsEnabled() &&
       !C.getDriver().embedBitcodeEnabled() && isa<CompileJobAction>(JA))
     CmdArgs.push_back("-disable-llvm-passes");
 
   if (Output.getType() == types::TY_Dependencies) {
     // Handled with other dependency code.
   } else if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   addDashXForInput(Args, Input, CmdArgs);
 
   if (Input.isFilename())
     CmdArgs.push_back(Input.getFilename());
   else
     Input.getInputArg().renderAsInput(Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, options::OPT_undef);
 
   const char *Exec = getToolChain().getDriver().getClangProgramPath();
 
   // Optionally embed the -cc1 level arguments into the debug info, for build
   // analysis.
   if (getToolChain().UseDwarfDebugFlags()) {
     ArgStringList OriginalArgs;
     for (const auto &Arg : Args)
       Arg->render(Args, OriginalArgs);
 
     SmallString<256> Flags;
     Flags += Exec;
     for (const char *OriginalArg : OriginalArgs) {
       SmallString<128> EscapedArg;
       EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
       Flags += " ";
       Flags += EscapedArg;
     }
     CmdArgs.push_back("-dwarf-debug-flags");
     CmdArgs.push_back(Args.MakeArgString(Flags));
   }
 
   // Add the split debug info name to the command lines here so we
   // can propagate it to the backend.
   bool SplitDwarf = SplitDwarfArg && getToolChain().getTriple().isOSLinux() &&
                     (isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
                      isa<BackendJobAction>(JA));
   const char *SplitDwarfOut;
   if (SplitDwarf) {
     CmdArgs.push_back("-split-dwarf-file");
     SplitDwarfOut = SplitDebugName(Args, Input);
     CmdArgs.push_back(SplitDwarfOut);
   }
 
   // Host-side cuda compilation receives device-side outputs as Inputs[1...].
   // Include them with -fcuda-include-gpubinary.
   if (IsCuda && Inputs.size() > 1)
     for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) {
       CmdArgs.push_back("-fcuda-include-gpubinary");
       CmdArgs.push_back(I->getFilename());
     }
 
   bool WholeProgramVTables =
       Args.hasFlag(options::OPT_fwhole_program_vtables,
                    options::OPT_fno_whole_program_vtables, false);
   if (WholeProgramVTables) {
     if (!D.isUsingLTO())
       D.Diag(diag::err_drv_argument_only_allowed_with)
           << "-fwhole-program-vtables"
           << "-flto";
     CmdArgs.push_back("-fwhole-program-vtables");
   }
 
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
       (InputType == types::TY_C || InputType == types::TY_CXX)) {
     auto CLCommand =
         getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput);
     C.addCommand(llvm::make_unique<FallbackCommand>(
         JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand)));
   } else if (Args.hasArg(options::OPT__SLASH_fallback) &&
              isa<PrecompileJobAction>(JA)) {
     // In /fallback builds, run the main compilation even if the pch generation
     // fails, so that the main compilation's fallback to cl.exe runs.
     C.addCommand(llvm::make_unique<ForceSuccessCommand>(JA, *this, Exec,
                                                         CmdArgs, Inputs));
   } else {
     C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
   }
 
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
   // TODO: Currently only works on linux with newer objcopy.
   if (SplitDwarf && Output.getType() == types::TY_Object)
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut);
 
   if (Arg *A = Args.getLastArg(options::OPT_pg))
     if (Args.hasArg(options::OPT_fomit_frame_pointer))
       D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
                                                       << A->getAsString(Args);
 
   // Claim some arguments which clang supports automatically.
 
   // -fpch-preprocess is used with gcc to add a special marker in the output to
   // include the PCH file. Clang's PTH solution is completely transparent, so we
   // do not need to deal with it at all.
   Args.ClaimAllArgs(options::OPT_fpch_preprocess);
 
   // Claim some arguments which clang doesn't support, but we don't
   // care to warn the user about.
   Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group);
   Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group);
 
   // Disable warnings for clang -E -emit-llvm foo.c
   Args.ClaimAllArgs(options::OPT_emit_llvm);
 }
 
 /// Add options related to the Objective-C runtime/ABI.
 ///
 /// Returns true if the runtime is non-fragile.
 ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
                                       ArgStringList &cmdArgs,
                                       RewriteKind rewriteKind) const {
   // Look for the controlling runtime option.
   Arg *runtimeArg =
       args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime,
                       options::OPT_fobjc_runtime_EQ);
 
   // Just forward -fobjc-runtime= to the frontend.  This supercedes
   // options about fragility.
   if (runtimeArg &&
       runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) {
     ObjCRuntime runtime;
     StringRef value = runtimeArg->getValue();
     if (runtime.tryParse(value)) {
       getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
           << value;
     }
 
     runtimeArg->render(args, cmdArgs);
     return runtime;
   }
 
   // Otherwise, we'll need the ABI "version".  Version numbers are
   // slightly confusing for historical reasons:
   //   1 - Traditional "fragile" ABI
   //   2 - Non-fragile ABI, version 1
   //   3 - Non-fragile ABI, version 2
   unsigned objcABIVersion = 1;
   // If -fobjc-abi-version= is present, use that to set the version.
   if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) {
     StringRef value = abiArg->getValue();
     if (value == "1")
       objcABIVersion = 1;
     else if (value == "2")
       objcABIVersion = 2;
     else if (value == "3")
       objcABIVersion = 3;
     else
       getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value;
   } else {
     // Otherwise, determine if we are using the non-fragile ABI.
     bool nonFragileABIIsDefault =
         (rewriteKind == RK_NonFragile ||
          (rewriteKind == RK_None &&
           getToolChain().IsObjCNonFragileABIDefault()));
     if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
                      options::OPT_fno_objc_nonfragile_abi,
                      nonFragileABIIsDefault)) {
 // Determine the non-fragile ABI version to use.
 #ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
       unsigned nonFragileABIVersion = 1;
 #else
       unsigned nonFragileABIVersion = 2;
 #endif
 
       if (Arg *abiArg =
               args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) {
         StringRef value = abiArg->getValue();
         if (value == "1")
           nonFragileABIVersion = 1;
         else if (value == "2")
           nonFragileABIVersion = 2;
         else
           getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
               << value;
       }
 
       objcABIVersion = 1 + nonFragileABIVersion;
     } else {
       objcABIVersion = 1;
     }
   }
 
   // We don't actually care about the ABI version other than whether
   // it's non-fragile.
   bool isNonFragile = objcABIVersion != 1;
 
   // If we have no runtime argument, ask the toolchain for its default runtime.
   // However, the rewriter only really supports the Mac runtime, so assume that.
   ObjCRuntime runtime;
   if (!runtimeArg) {
     switch (rewriteKind) {
     case RK_None:
       runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
       break;
     case RK_Fragile:
       runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple());
       break;
     case RK_NonFragile:
       runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
       break;
     }
 
     // -fnext-runtime
   } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
     // On Darwin, make this use the default behavior for the toolchain.
     if (getToolChain().getTriple().isOSDarwin()) {
       runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
 
       // Otherwise, build for a generic macosx port.
     } else {
       runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
     }
 
     // -fgnu-runtime
   } else {
     assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
     // Legacy behaviour is to target the gnustep runtime if we are in
     // non-fragile mode or the GCC runtime in fragile mode.
     if (isNonFragile)
       runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6));
     else
       runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
   }
 
   cmdArgs.push_back(
       args.MakeArgString("-fobjc-runtime=" + runtime.getAsString()));
   return runtime;
 }
 
 static bool maybeConsumeDash(const std::string &EH, size_t &I) {
   bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-');
   I += HaveDash;
   return !HaveDash;
 }
 
 namespace {
 struct EHFlags {
   bool Synch = false;
   bool Asynch = false;
   bool NoUnwindC = false;
 };
 } // end anonymous namespace
 
 /// /EH controls whether to run destructor cleanups when exceptions are
 /// thrown.  There are three modifiers:
 /// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions.
 /// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions.
 ///      The 'a' modifier is unimplemented and fundamentally hard in LLVM IR.
 /// - c: Assume that extern "C" functions are implicitly nounwind.
 /// The default is /EHs-c-, meaning cleanups are disabled.
 static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
   EHFlags EH;
 
   std::vector<std::string> EHArgs =
       Args.getAllArgValues(options::OPT__SLASH_EH);
   for (auto EHVal : EHArgs) {
     for (size_t I = 0, E = EHVal.size(); I != E; ++I) {
       switch (EHVal[I]) {
       case 'a':
         EH.Asynch = maybeConsumeDash(EHVal, I);
         if (EH.Asynch)
           EH.Synch = false;
         continue;
       case 'c':
         EH.NoUnwindC = maybeConsumeDash(EHVal, I);
         continue;
       case 's':
         EH.Synch = maybeConsumeDash(EHVal, I);
         if (EH.Synch)
           EH.Asynch = false;
         continue;
       default:
         break;
       }
       D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal;
       break;
     }
   }
   // The /GX, /GX- flags are only processed if there are not /EH flags.
   // The default is that /GX is not specified.
   if (EHArgs.empty() &&
       Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
                    /*default=*/false)) {
     EH.Synch = true;
     EH.NoUnwindC = true;
   }
 
   return EH;
 }
 
 void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
                            ArgStringList &CmdArgs,
                            codegenoptions::DebugInfoKind *DebugInfoKind,
                            bool *EmitCodeView) const {
   unsigned RTOptionID = options::OPT__SLASH_MT;
 
   if (Args.hasArg(options::OPT__SLASH_LDd))
     // The /LDd option implies /MTd. The dependent lib part can be overridden,
     // but defining _DEBUG is sticky.
     RTOptionID = options::OPT__SLASH_MTd;
 
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
     RTOptionID = A->getOption().getID();
 
   StringRef FlagForCRT;
   switch (RTOptionID) {
   case options::OPT__SLASH_MD:
     if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-D_DLL");
     FlagForCRT = "--dependent-lib=msvcrt";
     break;
   case options::OPT__SLASH_MDd:
     CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-D_DLL");
     FlagForCRT = "--dependent-lib=msvcrtd";
     break;
   case options::OPT__SLASH_MT:
     if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-flto-visibility-public-std");
     FlagForCRT = "--dependent-lib=libcmt";
     break;
   case options::OPT__SLASH_MTd:
     CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-flto-visibility-public-std");
     FlagForCRT = "--dependent-lib=libcmtd";
     break;
   default:
     llvm_unreachable("Unexpected option ID.");
   }
 
   if (Args.hasArg(options::OPT__SLASH_Zl)) {
     CmdArgs.push_back("-D_VC_NODEFAULTLIB");
   } else {
     CmdArgs.push_back(FlagForCRT.data());
 
     // This provides POSIX compatibility (maps 'open' to '_open'), which most
     // users want.  The /Za flag to cl.exe turns this off, but it's not
     // implemented in clang.
     CmdArgs.push_back("--dependent-lib=oldnames");
   }
 
   // Both /showIncludes and /E (and /EP) write to stdout. Allowing both
   // would produce interleaved output, so ignore /showIncludes in such cases.
   if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP))
     if (Arg *A = Args.getLastArg(options::OPT_show_includes))
       A->render(Args, CmdArgs);
 
   // This controls whether or not we emit RTTI data for polymorphic types.
   if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
                    /*default=*/false))
     CmdArgs.push_back("-fno-rtti-data");
 
   // This controls whether or not we emit stack-protector instrumentation.
   // In MSVC, Buffer Security Check (/GS) is on by default.
   if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
                    /*default=*/true)) {
     CmdArgs.push_back("-stack-protector");
     CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
   }
 
   // Emit CodeView if -Z7, -Zd, or -gline-tables-only are present.
   if (Arg *DebugInfoArg =
           Args.getLastArg(options::OPT__SLASH_Z7, options::OPT__SLASH_Zd,
                           options::OPT_gline_tables_only)) {
     *EmitCodeView = true;
     if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
       *DebugInfoKind = codegenoptions::LimitedDebugInfo;
     else
       *DebugInfoKind = codegenoptions::DebugLineTablesOnly;
     CmdArgs.push_back("-gcodeview");
   } else {
     *EmitCodeView = false;
   }
 
   const Driver &D = getToolChain().getDriver();
   EHFlags EH = parseClangCLEHFlags(D, Args);
   if (EH.Synch || EH.Asynch) {
     if (types::isCXX(InputType))
       CmdArgs.push_back("-fcxx-exceptions");
     CmdArgs.push_back("-fexceptions");
   }
   if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC)
     CmdArgs.push_back("-fexternc-nounwind");
 
   // /EP should expand to -E -P.
   if (Args.hasArg(options::OPT__SLASH_EP)) {
     CmdArgs.push_back("-E");
     CmdArgs.push_back("-P");
   }
 
   unsigned VolatileOptionID;
   if (getToolChain().getArch() == llvm::Triple::x86_64 ||
       getToolChain().getArch() == llvm::Triple::x86)
     VolatileOptionID = options::OPT__SLASH_volatile_ms;
   else
     VolatileOptionID = options::OPT__SLASH_volatile_iso;
 
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group))
     VolatileOptionID = A->getOption().getID();
 
   if (VolatileOptionID == options::OPT__SLASH_volatile_ms)
     CmdArgs.push_back("-fms-volatile");
 
   Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg);
   Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb);
   if (MostGeneralArg && BestCaseArg)
     D.Diag(clang::diag::err_drv_argument_not_allowed_with)
         << MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args);
 
   if (MostGeneralArg) {
     Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms);
     Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm);
     Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv);
 
     Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg;
     Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg;
     if (FirstConflict && SecondConflict && FirstConflict != SecondConflict)
       D.Diag(clang::diag::err_drv_argument_not_allowed_with)
           << FirstConflict->getAsString(Args)
           << SecondConflict->getAsString(Args);
 
     if (SingleArg)
       CmdArgs.push_back("-fms-memptr-rep=single");
     else if (MultipleArg)
       CmdArgs.push_back("-fms-memptr-rep=multiple");
     else
       CmdArgs.push_back("-fms-memptr-rep=virtual");
   }
 
   if (Args.getLastArg(options::OPT__SLASH_Gd))
      CmdArgs.push_back("-fdefault-calling-conv=cdecl");
   else if (Args.getLastArg(options::OPT__SLASH_Gr))
      CmdArgs.push_back("-fdefault-calling-conv=fastcall");
   else if (Args.getLastArg(options::OPT__SLASH_Gz))
      CmdArgs.push_back("-fdefault-calling-conv=stdcall");
   else if (Args.getLastArg(options::OPT__SLASH_Gv))
      CmdArgs.push_back("-fdefault-calling-conv=vectorcall");
 
   if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ))
     A->render(Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
     CmdArgs.push_back("-fdiagnostics-format");
     if (Args.hasArg(options::OPT__SLASH_fallback))
       CmdArgs.push_back("msvc-fallback");
     else
       CmdArgs.push_back("msvc");
   }
 }
 
 visualstudio::Compiler *Clang::getCLFallback() const {
   if (!CLFallback)
     CLFallback.reset(new visualstudio::Compiler(getToolChain()));
   return CLFallback.get();
 }
 
 void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
                                 ArgStringList &CmdArgs) const {
   StringRef CPUName;
   StringRef ABIName;
   const llvm::Triple &Triple = getToolChain().getTriple();
   mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
 
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName.data());
 }
 
 void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
                            const InputInfo &Output, const InputInfoList &Inputs,
                            const ArgList &Args,
                            const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   assert(Inputs.size() == 1 && "Unexpected number of inputs.");
   const InputInfo &Input = Inputs[0];
 
   std::string TripleStr =
       getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
   const llvm::Triple Triple(TripleStr);
 
   // Don't warn about "clang -w -c foo.s"
   Args.ClaimAllArgs(options::OPT_w);
   // and "clang -emit-llvm -c foo.s"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
 
   claimNoWarnArgs(Args);
 
   // Invoke ourselves in -cc1as mode.
   //
   // FIXME: Implement custom jobs for internal actions.
   CmdArgs.push_back("-cc1as");
 
   // Add the "effective" target triple.
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
   // Set the output mode, we currently only expect to be used as a real
   // assembler.
   CmdArgs.push_back("-filetype");
   CmdArgs.push_back("obj");
 
   // Set the main file name, so that debug info works even with
   // -save-temps or preprocessed assembly.
   CmdArgs.push_back("-main-file-name");
   CmdArgs.push_back(Clang::getBaseInputName(Args, Input));
 
   // Add the target cpu
   std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true);
   if (!CPU.empty()) {
     CmdArgs.push_back("-target-cpu");
     CmdArgs.push_back(Args.MakeArgString(CPU));
   }
 
   // Add the target features
   getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, true);
 
   // Ignore explicit -force_cpusubtype_ALL option.
   (void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
 
   // Pass along any -I options so we get proper .include search paths.
   Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
   // Determine the original source input.
   const Action *SourceAction = &JA;
   while (SourceAction->getKind() != Action::InputClass) {
     assert(!SourceAction->getInputs().empty() && "unexpected root action!");
     SourceAction = SourceAction->getInputs()[0];
   }
 
   // Forward -g and handle debug info related flags, assuming we are dealing
   // with an actual assembly file.
   bool WantDebug = false;
   unsigned DwarfVersion = 0;
   Args.ClaimAllArgs(options::OPT_g_Group);
   if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
     WantDebug = !A->getOption().matches(options::OPT_g0) &&
                 !A->getOption().matches(options::OPT_ggdb0);
     if (WantDebug)
       DwarfVersion = DwarfVersionNum(A->getSpelling());
   }
   if (DwarfVersion == 0)
     DwarfVersion = getToolChain().GetDefaultDwarfVersion();
 
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
 
   if (SourceAction->getType() == types::TY_Asm ||
       SourceAction->getType() == types::TY_PP_Asm) {
     // You might think that it would be ok to set DebugInfoKind outside of
     // the guard for source type, however there is a test which asserts
     // that some assembler invocation receives no -debug-info-kind,
     // and it's not clear whether that test is just overly restrictive.
     DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo
                                : codegenoptions::NoDebugInfo);
     // Add the -fdebug-compilation-dir flag if needed.
     addDebugCompDirArg(Args, CmdArgs);
 
     // Set the AT_producer to the clang version when using the integrated
     // assembler on assembly source files.
     CmdArgs.push_back("-dwarf-debug-producer");
     CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));
 
     // And pass along -I options
     Args.AddAllArgs(CmdArgs, options::OPT_I);
   }
   RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
                           llvm::DebuggerKind::Default);
 
   // Handle -fPIC et al -- the relocation-model affects the assembler
   // for some targets.
   llvm::Reloc::Model RelocationModel;
   unsigned PICLevel;
   bool IsPIE;
   std::tie(RelocationModel, PICLevel, IsPIE) =
       ParsePICArgs(getToolChain(), Triple, Args);
 
   const char *RMName = RelocationModelName(RelocationModel);
   if (RMName) {
     CmdArgs.push_back("-mrelocation-model");
     CmdArgs.push_back(RMName);
   }
 
   // Optionally embed the -cc1as level arguments into the debug info, for build
   // analysis.
   if (getToolChain().UseDwarfDebugFlags()) {
     ArgStringList OriginalArgs;
     for (const auto &Arg : Args)
       Arg->render(Args, OriginalArgs);
 
     SmallString<256> Flags;
     const char *Exec = getToolChain().getDriver().getClangProgramPath();
     Flags += Exec;
     for (const char *OriginalArg : OriginalArgs) {
       SmallString<128> EscapedArg;
       EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
       Flags += " ";
       Flags += EscapedArg;
     }
     CmdArgs.push_back("-dwarf-debug-flags");
     CmdArgs.push_back(Args.MakeArgString(Flags));
   }
 
   // FIXME: Add -static support, once we have it.
 
   // Add target specific flags.
   switch (getToolChain().getArch()) {
   default:
     break;
 
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     AddMIPSTargetArgs(Args, CmdArgs);
     break;
   }
 
   // Consume all the warning flags. Usually this would be handled more
   // gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as
   // doesn't handle that so rather than warning about unused flags that are
   // actually used, we'll lie by omission instead.
   // FIXME: Stop lying and consume only the appropriate driver flags
   Args.ClaimAllArgs(options::OPT_W_Group);
 
   CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
                                     getToolChain().getDriver());
 
   Args.AddAllArgs(CmdArgs, options::OPT_mllvm);
 
   assert(Output.isFilename() && "Unexpected lipo output.");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   assert(Input.isFilename() && "Invalid input.");
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec = getToolChain().getDriver().getClangProgramPath();
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
   // TODO: Currently only works on linux with newer objcopy.
   if (Args.hasArg(options::OPT_gsplit_dwarf) &&
       getToolChain().getTriple().isOSLinux())
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
                    SplitDebugName(Args, Input));
 }
 
 void GnuTool::anchor() {}
 
 void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
                                const InputInfo &Output,
                                const InputInfoList &Inputs, const ArgList &Args,
                                const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   for (const auto &A : Args) {
     if (forwardToGCC(A->getOption())) {
       // It is unfortunate that we have to claim here, as this means
       // we will basically never report anything interesting for
       // platforms using a generic gcc, even if we are just using gcc
       // to get to the assembler.
       A->claim();
 
       // Don't forward any -g arguments to assembly steps.
       if (isa<AssembleJobAction>(JA) &&
           A->getOption().matches(options::OPT_g_Group))
         continue;
 
       // Don't forward any -W arguments to assembly and link steps.
       if ((isa<AssembleJobAction>(JA) || isa<LinkJobAction>(JA)) &&
           A->getOption().matches(options::OPT_W_Group))
         continue;
 
       A->render(Args, CmdArgs);
     }
   }
 
   RenderExtraToolArgs(JA, CmdArgs);
 
   // If using a driver driver, force the arch.
   if (getToolChain().getTriple().isOSDarwin()) {
     CmdArgs.push_back("-arch");
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().getDefaultUniversalArchName()));
   }
 
   // Try to force gcc to match the tool chain we want, if we recognize
   // the arch.
   //
   // FIXME: The triple class should directly provide the information we want
   // here.
   switch (getToolChain().getArch()) {
   default:
     break;
   case llvm::Triple::x86:
   case llvm::Triple::ppc:
     CmdArgs.push_back("-m32");
     break;
   case llvm::Triple::x86_64:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
     CmdArgs.push_back("-m64");
     break;
   case llvm::Triple::sparcel:
     CmdArgs.push_back("-EL");
     break;
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Unexpected output");
     CmdArgs.push_back("-fsyntax-only");
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   // Only pass -x if gcc will understand it; otherwise hope gcc
   // understands the suffix correctly. The main use case this would go
   // wrong in is for linker inputs if they happened to have an odd
   // suffix; really the only way to get this to happen is a command
   // like '-x foobar a.c' which will treat a.c like a linker input.
   //
   // FIXME: For the linker case specifically, can we safely convert
   // inputs into '-Wl,' options?
   for (const auto &II : Inputs) {
     // Don't try to pass LLVM or AST inputs to a generic gcc.
     if (types::isLLVMIR(II.getType()))
       D.Diag(diag::err_drv_no_linker_llvm_support)
           << getToolChain().getTripleString();
     else if (II.getType() == types::TY_AST)
       D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString();
     else if (II.getType() == types::TY_ModuleFile)
       D.Diag(diag::err_drv_no_module_support)
           << getToolChain().getTripleString();
 
     if (types::canTypeBeUserSpecified(II.getType())) {
       CmdArgs.push_back("-x");
       CmdArgs.push_back(types::getTypeName(II.getType()));
     }
 
     if (II.isFilename())
       CmdArgs.push_back(II.getFilename());
     else {
       const Arg &A = II.getInputArg();
 
       // Reverse translate some rewritten options.
       if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) {
         CmdArgs.push_back("-lstdc++");
         continue;
       }
 
       // Don't render as input, we need gcc to do the translations.
       A.render(Args, CmdArgs);
     }
   }
 
   const std::string &customGCCName = D.getCCCGenericGCCName();
   const char *GCCName;
   if (!customGCCName.empty())
     GCCName = customGCCName.c_str();
   else if (D.CCCIsCXX()) {
     GCCName = "g++";
   } else
     GCCName = "gcc";
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA,
                                             ArgStringList &CmdArgs) const {
   CmdArgs.push_back("-E");
 }
 
 void gcc::Compiler::RenderExtraToolArgs(const JobAction &JA,
                                         ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
 
   switch (JA.getType()) {
   // If -flto, etc. are present then make sure not to force assembly output.
   case types::TY_LLVM_IR:
   case types::TY_LTO_IR:
   case types::TY_LLVM_BC:
   case types::TY_LTO_BC:
     CmdArgs.push_back("-c");
     break;
   // We assume we've got an "integrated" assembler in that gcc will produce an
   // object file itself.
   case types::TY_Object:
     CmdArgs.push_back("-c");
     break;
   case types::TY_PP_Asm:
     CmdArgs.push_back("-S");
     break;
   case types::TY_Nothing:
     CmdArgs.push_back("-fsyntax-only");
     break;
   default:
     D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType());
   }
 }
 
 void gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
                                       ArgStringList &CmdArgs) const {
   // The types are (hopefully) good enough.
 }
 
 // Hexagon tools start.
 void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA,
                                              ArgStringList &CmdArgs) const {
 }
 
 void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
                                       const ArgList &Args,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
   auto &HTC = static_cast<const toolchains::HexagonToolChain&>(getToolChain());
   const Driver &D = HTC.getDriver();
   ArgStringList CmdArgs;
 
   std::string MArchString = "-march=hexagon";
   CmdArgs.push_back(Args.MakeArgString(MArchString));
 
   RenderExtraToolArgs(JA, CmdArgs);
 
   std::string AsName = "hexagon-llvm-mc";
   std::string MCpuString = "-mcpu=hexagon" +
         toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
   CmdArgs.push_back("-filetype=obj");
   CmdArgs.push_back(Args.MakeArgString(MCpuString));
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Unexpected output");
     CmdArgs.push_back("-fsyntax-only");
   }
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
     std::string N = llvm::utostr(G.getValue());
     CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N));
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   // Only pass -x if gcc will understand it; otherwise hope gcc
   // understands the suffix correctly. The main use case this would go
   // wrong in is for linker inputs if they happened to have an odd
   // suffix; really the only way to get this to happen is a command
   // like '-x foobar a.c' which will treat a.c like a linker input.
   //
   // FIXME: For the linker case specifically, can we safely convert
   // inputs into '-Wl,' options?
   for (const auto &II : Inputs) {
     // Don't try to pass LLVM or AST inputs to a generic gcc.
     if (types::isLLVMIR(II.getType()))
       D.Diag(clang::diag::err_drv_no_linker_llvm_support)
           << HTC.getTripleString();
     else if (II.getType() == types::TY_AST)
       D.Diag(clang::diag::err_drv_no_ast_support)
           << HTC.getTripleString();
     else if (II.getType() == types::TY_ModuleFile)
       D.Diag(diag::err_drv_no_module_support)
           << HTC.getTripleString();
 
     if (II.isFilename())
       CmdArgs.push_back(II.getFilename());
     else
       // Don't render as input, we need gcc to do the translations.
       // FIXME: What is this?
       II.getInputArg().render(Args, CmdArgs);
   }
 
   auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str()));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA,
                                           ArgStringList &CmdArgs) const {
 }
 
 static void
 constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
                          const toolchains::HexagonToolChain &HTC,
                          const InputInfo &Output, const InputInfoList &Inputs,
                          const ArgList &Args, ArgStringList &CmdArgs,
                          const char *LinkingOutput) {
 
   const Driver &D = HTC.getDriver();
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
   bool IsStatic = Args.hasArg(options::OPT_static);
   bool IsShared = Args.hasArg(options::OPT_shared);
   bool IsPIE = Args.hasArg(options::OPT_pie);
   bool IncStdLib = !Args.hasArg(options::OPT_nostdlib);
   bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles);
   bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
   bool UseG0 = false;
   bool UseShared = IsShared && !IsStatic;
 
   //----------------------------------------------------------------------------
   // Silence warnings for various options
   //----------------------------------------------------------------------------
   Args.ClaimAllArgs(options::OPT_g_Group);
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   Args.ClaimAllArgs(options::OPT_w); // Other warning options are already
                                      // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_static_libgcc);
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
   if (Args.hasArg(options::OPT_r))
     CmdArgs.push_back("-r");
 
   for (const auto &Opt : HTC.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
 
   CmdArgs.push_back("-march=hexagon");
   std::string CpuVer =
         toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
   std::string MCpuString = "-mcpu=hexagon" + CpuVer;
   CmdArgs.push_back(Args.MakeArgString(MCpuString));
 
   if (IsShared) {
     CmdArgs.push_back("-shared");
     // The following should be the default, but doing as hexagon-gcc does.
     CmdArgs.push_back("-call_shared");
   }
 
   if (IsStatic)
     CmdArgs.push_back("-static");
 
   if (IsPIE && !IsShared)
     CmdArgs.push_back("-pie");
 
   if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
     std::string N = llvm::utostr(G.getValue());
     CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N));
     UseG0 = G.getValue() == 0;
   }
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   //----------------------------------------------------------------------------
   // moslib
   //----------------------------------------------------------------------------
   std::vector<std::string> OsLibs;
   bool HasStandalone = false;
 
   for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) {
     A->claim();
     OsLibs.emplace_back(A->getValue());
     HasStandalone = HasStandalone || (OsLibs.back() == "standalone");
   }
   if (OsLibs.empty()) {
     OsLibs.push_back("standalone");
     HasStandalone = true;
   }
 
   //----------------------------------------------------------------------------
   // Start Files
   //----------------------------------------------------------------------------
   const std::string MCpuSuffix = "/" + CpuVer;
   const std::string MCpuG0Suffix = MCpuSuffix + "/G0";
   const std::string RootDir =
       HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/";
   const std::string StartSubDir =
       "hexagon/lib" + (UseG0 ? MCpuG0Suffix : MCpuSuffix);
 
   auto Find = [&HTC] (const std::string &RootDir, const std::string &SubDir,
                       const char *Name) -> std::string {
     std::string RelName = SubDir + Name;
     std::string P = HTC.GetFilePath(RelName.c_str());
     if (llvm::sys::fs::exists(P))
       return P;
     return RootDir + RelName;
   };
 
   if (IncStdLib && IncStartFiles) {
     if (!IsShared) {
       if (HasStandalone) {
         std::string Crt0SA = Find(RootDir, StartSubDir, "/crt0_standalone.o");
         CmdArgs.push_back(Args.MakeArgString(Crt0SA));
       }
       std::string Crt0 = Find(RootDir, StartSubDir, "/crt0.o");
       CmdArgs.push_back(Args.MakeArgString(Crt0));
     }
     std::string Init = UseShared
           ? Find(RootDir, StartSubDir + "/pic", "/initS.o")
           : Find(RootDir, StartSubDir, "/init.o");
     CmdArgs.push_back(Args.MakeArgString(Init));
   }
 
   //----------------------------------------------------------------------------
   // Library Search Paths
   //----------------------------------------------------------------------------
   const ToolChain::path_list &LibPaths = HTC.getFilePaths();
   for (const auto &LibPath : LibPaths)
     CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_T_Group, options::OPT_e, options::OPT_s,
                    options::OPT_t, options::OPT_u_Group});
 
   AddLinkerInputs(HTC, Inputs, Args, CmdArgs);
 
   //----------------------------------------------------------------------------
   // Libraries
   //----------------------------------------------------------------------------
   if (IncStdLib && IncDefLibs) {
     if (D.CCCIsCXX()) {
       HTC.AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
 
     CmdArgs.push_back("--start-group");
 
     if (!IsShared) {
       for (const std::string &Lib : OsLibs)
         CmdArgs.push_back(Args.MakeArgString("-l" + Lib));
       CmdArgs.push_back("-lc");
     }
     CmdArgs.push_back("-lgcc");
 
     CmdArgs.push_back("--end-group");
   }
 
   //----------------------------------------------------------------------------
   // End files
   //----------------------------------------------------------------------------
   if (IncStdLib && IncStartFiles) {
     std::string Fini = UseShared
           ? Find(RootDir, StartSubDir + "/pic", "/finiS.o")
           : Find(RootDir, StartSubDir, "/fini.o");
     CmdArgs.push_back(Args.MakeArgString(Fini));
   }
 }
 
 void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfo &Output,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
   auto &HTC = static_cast<const toolchains::HexagonToolChain&>(getToolChain());
 
   ArgStringList CmdArgs;
   constructHexagonLinkArgs(C, JA, HTC, Output, Inputs, Args, CmdArgs,
                            LinkingOutput);
 
   std::string Linker = HTC.GetProgramPath("hexagon-link");
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
                                           CmdArgs, Inputs));
 }
 // Hexagon tools end.
 
 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
                                   const ArgList &Args,
                                   const char *LinkingOutput) const {
 
   std::string Linker = getToolChain().GetProgramPath(getShortName());
   ArgStringList CmdArgs;
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
   CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
                                           CmdArgs, Inputs));
 }
 // AMDGPU tools end.
 
 wasm::Linker::Linker(const ToolChain &TC)
   : GnuTool("wasm::Linker", "lld", TC) {}
 
 bool wasm::Linker::isLinkJob() const {
   return true;
 }
 
 bool wasm::Linker::hasIntegratedCPP() const {
   return false;
 }
 
 void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
                                 const ArgList &Args,
                                 const char *LinkingOutput) const {
 
   const ToolChain &ToolChain = getToolChain();
   const Driver &D = ToolChain.getDriver();
   const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
   ArgStringList CmdArgs;
   CmdArgs.push_back("-flavor");
   CmdArgs.push_back("ld");
 
   // Enable garbage collection of unused input sections by default, since code
   // size is of particular importance. This is significantly facilitated by
   // the enabling of -ffunction-sections and -fdata-sections in
   // Clang::ConstructJob.
   if (areOptimizationsEnabled(Args))
     CmdArgs.push_back("--gc-sections");
 
   if (Args.hasArg(options::OPT_rdynamic))
     CmdArgs.push_back("-export-dynamic");
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("--strip-all");
   if (Args.hasArg(options::OPT_shared))
     CmdArgs.push_back("-shared");
   if (Args.hasArg(options::OPT_static))
     CmdArgs.push_back("-Bstatic");
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("rcrt1.o")));
     else if (Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o")));
     else
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o")));
 
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
   }
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX())
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
 
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
 
     CmdArgs.push_back("-lc");
     CmdArgs.push_back("-lcompiler_rt");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   C.addCommand(llvm::make_unique<Command>(JA, *this, Linker, CmdArgs, Inputs));
 }
 
 const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) {
   std::string MArch;
   if (!Arch.empty())
     MArch = Arch;
   else
     MArch = Triple.getArchName();
   MArch = StringRef(MArch).split("+").first.lower();
 
   // Handle -march=native.
   if (MArch == "native") {
     std::string CPU = llvm::sys::getHostCPUName();
     if (CPU != "generic") {
       // Translate the native cpu into the architecture suffix for that CPU.
       StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple);
       // If there is no valid architecture suffix for this CPU we don't know how
       // to handle it, so return no architecture.
       if (Suffix.empty())
         MArch = "";
       else
         MArch = std::string("arm") + Suffix.str();
     }
   }
 
   return MArch;
 }
 
 /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting.
 StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) {
   std::string MArch = getARMArch(Arch, Triple);
   // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch
   // here means an -march=native that we can't handle, so instead return no CPU.
   if (MArch.empty())
     return StringRef();
 
   // We need to return an empty string here on invalid MArch values as the
   // various places that call this function can't cope with a null result.
   return Triple.getARMCPUForArch(MArch);
 }
 
 /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
 std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch,
                                  const llvm::Triple &Triple) {
   // FIXME: Warn on inconsistent use of -mcpu and -march.
   // If we have -mcpu=, use that.
   if (!CPU.empty()) {
     std::string MCPU = StringRef(CPU).split("+").first.lower();
     // Handle -mcpu=native.
     if (MCPU == "native")
       return llvm::sys::getHostCPUName();
     else
       return MCPU;
   }
 
   return getARMCPUForMArch(Arch, Triple);
 }
 
 /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
 /// CPU  (or Arch, if CPU is generic).
 // FIXME: This is redundant with -mcpu, why does LLVM use this.
 StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
                                        const llvm::Triple &Triple) {
   unsigned ArchKind;
   if (CPU == "generic") {
     std::string ARMArch = tools::arm::getARMArch(Arch, Triple);
     ArchKind = llvm::ARM::parseArch(ARMArch);
     if (ArchKind == llvm::ARM::AK_INVALID)
       // In case of generic Arch, i.e. "arm",
       // extract arch from default cpu of the Triple
       ArchKind = llvm::ARM::parseCPUArch(Triple.getARMCPUForArch(ARMArch));
   } else {
     // FIXME: horrible hack to get around the fact that Cortex-A7 is only an
     // armv7k triple if it's actually been specified via "-arch armv7k".
     ArchKind = (Arch == "armv7k" || Arch == "thumbv7k")
                           ? (unsigned)llvm::ARM::AK_ARMV7K
                           : llvm::ARM::parseCPUArch(CPU);
   }
   if (ArchKind == llvm::ARM::AK_INVALID)
     return "";
   return llvm::ARM::getSubArch(ArchKind);
 }
 
 void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs,
                             const llvm::Triple &Triple) {
   if (Args.hasArg(options::OPT_r))
     return;
 
   // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker
   // to generate BE-8 executables.
   if (getARMSubArchVersionNumber(Triple) >= 7 || isARMMProfile(Triple))
     CmdArgs.push_back("--be8");
 }
 
 mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
   // Strictly speaking, mips32r2 and mips64r2 are NanLegacy-only since Nan2008
   // was first introduced in Release 3. However, other compilers have
   // traditionally allowed it for Release 2 so we should do the same.
   return (NanEncoding)llvm::StringSwitch<int>(CPU)
       .Case("mips1", NanLegacy)
       .Case("mips2", NanLegacy)
       .Case("mips3", NanLegacy)
       .Case("mips4", NanLegacy)
       .Case("mips5", NanLegacy)
       .Case("mips32", NanLegacy)
       .Case("mips32r2", NanLegacy | Nan2008)
       .Case("mips32r3", NanLegacy | Nan2008)
       .Case("mips32r5", NanLegacy | Nan2008)
       .Case("mips32r6", Nan2008)
       .Case("mips64", NanLegacy)
       .Case("mips64r2", NanLegacy | Nan2008)
       .Case("mips64r3", NanLegacy | Nan2008)
       .Case("mips64r5", NanLegacy | Nan2008)
       .Case("mips64r6", Nan2008)
       .Default(NanLegacy);
 }
 
 bool mips::hasCompactBranches(StringRef &CPU) {
   // mips32r6 and mips64r6 have compact branches.
   return llvm::StringSwitch<bool>(CPU)
       .Case("mips32r6", true)
       .Case("mips64r6", true)
       .Default(false);
 }
 
 bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) {
   Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
   return A && (A->getValue() == StringRef(Value));
 }
 
 bool mips::isUCLibc(const ArgList &Args) {
   Arg *A = Args.getLastArg(options::OPT_m_libc_Group);
   return A && A->getOption().matches(options::OPT_muclibc);
 }
 
 bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
   if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ))
     return llvm::StringSwitch<bool>(NaNArg->getValue())
         .Case("2008", true)
         .Case("legacy", false)
         .Default(false);
 
   // NaN2008 is the default for MIPS32r6/MIPS64r6.
   return llvm::StringSwitch<bool>(getCPUName(Args, Triple))
       .Cases("mips32r6", "mips64r6", true)
       .Default(false);
 
   return false;
 }
 
 bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) {
   if (!Triple.isAndroid())
     return false;
 
   // Android MIPS32R6 defaults to FP64A.
   return llvm::StringSwitch<bool>(CPUName)
       .Case("mips32r6", true)
       .Default(false);
 }
 
 bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
                          StringRef ABIName, mips::FloatABI FloatABI) {
   if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies &&
       Triple.getVendor() != llvm::Triple::MipsTechnologies &&
       !Triple.isAndroid())
     return false;
 
   if (ABIName != "32")
     return false;
 
   // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is
   // present.
   if (FloatABI == mips::FloatABI::Soft)
     return false;
 
   return llvm::StringSwitch<bool>(CPUName)
       .Cases("mips2", "mips3", "mips4", "mips5", true)
       .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true)
       .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true)
       .Default(false);
 }
 
 bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple,
                          StringRef CPUName, StringRef ABIName,
                          mips::FloatABI FloatABI) {
   bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI);
 
   // FPXX shouldn't be used if -msingle-float is present.
   if (Arg *A = Args.getLastArg(options::OPT_msingle_float,
                                options::OPT_mdouble_float))
     if (A->getOption().matches(options::OPT_msingle_float))
       UseFPXX = false;
 
   return UseFPXX;
 }
 
 llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
   // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
   // archs which Darwin doesn't use.
 
   // The matching this routine does is fairly pointless, since it is neither the
   // complete architecture list, nor a reasonable subset. The problem is that
   // historically the driver driver accepts this and also ties its -march=
   // handling to the architecture name, so we need to be careful before removing
   // support for it.
 
   // This code must be kept in sync with Clang's Darwin specific argument
   // translation.
 
   return llvm::StringSwitch<llvm::Triple::ArchType>(Str)
       .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
       .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
       .Case("ppc64", llvm::Triple::ppc64)
       .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
       .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
              llvm::Triple::x86)
       .Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
       // This is derived from the driver driver.
       .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
       .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
       .Cases("armv7s", "xscale", llvm::Triple::arm)
       .Case("arm64", llvm::Triple::aarch64)
       .Case("r600", llvm::Triple::r600)
       .Case("amdgcn", llvm::Triple::amdgcn)
       .Case("nvptx", llvm::Triple::nvptx)
       .Case("nvptx64", llvm::Triple::nvptx64)
       .Case("amdil", llvm::Triple::amdil)
       .Case("spir", llvm::Triple::spir)
       .Default(llvm::Triple::UnknownArch);
 }
 
 void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
   const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str);
   T.setArch(Arch);
 
   if (Str == "x86_64h")
     T.setArchName(Str);
   else if (Str == "armv6m" || Str == "armv7m" || Str == "armv7em") {
     T.setOS(llvm::Triple::UnknownOS);
     T.setObjectFormat(llvm::Triple::MachO);
   }
 }
 
 const char *Clang::getBaseInputName(const ArgList &Args,
                                     const InputInfo &Input) {
   return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput()));
 }
 
 const char *Clang::getBaseInputStem(const ArgList &Args,
                                     const InputInfoList &Inputs) {
   const char *Str = getBaseInputName(Args, Inputs[0]);
 
   if (const char *End = strrchr(Str, '.'))
     return Args.MakeArgString(std::string(Str, End));
 
   return Str;
 }
 
 const char *Clang::getDependencyFileName(const ArgList &Args,
                                          const InputInfoList &Inputs) {
   // FIXME: Think about this more.
   std::string Res;
 
   if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
     std::string Str(OutputOpt->getValue());
     Res = Str.substr(0, Str.rfind('.'));
   } else {
     Res = getBaseInputStem(Args, Inputs);
   }
   return Args.MakeArgString(Res + ".d");
 }
 
 void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   const ToolChain &ToolChain = getToolChain();
   const Driver &D = ToolChain.getDriver();
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   // CloudABI only supports static linkage.
   CmdArgs.push_back("-Bstatic");
   CmdArgs.push_back("--no-dynamic-linker");
 
   // Provide PIE linker flags in case PIE is default for the architecture.
   if (ToolChain.isPIEDefault()) {
     CmdArgs.push_back("-pie");
     CmdArgs.push_back("-zrelro");
   }
 
   CmdArgs.push_back("--eh-frame-hdr");
   CmdArgs.push_back("--gc-sections");
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_T_Group, options::OPT_e, options::OPT_s,
                    options::OPT_t, options::OPT_Z_Flag, options::OPT_r});
 
   if (D.isUsingLTO())
     AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX())
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
     CmdArgs.push_back("-lc");
     CmdArgs.push_back("-lcompiler_rt");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   assert(Inputs.size() == 1 && "Unexpected number of inputs.");
   const InputInfo &Input = Inputs[0];
 
   // Determine the original source input.
   const Action *SourceAction = &JA;
   while (SourceAction->getKind() != Action::InputClass) {
     assert(!SourceAction->getInputs().empty() && "unexpected root action!");
     SourceAction = SourceAction->getInputs()[0];
   }
 
   // If -fno-integrated-as is used add -Q to the darwin assember driver to make
   // sure it runs its system assembler not clang's integrated assembler.
   // Applicable to darwin11+ and Xcode 4+.  darwin<10 lacked integrated-as.
   // FIXME: at run-time detect assembler capabilities or rely on version
   // information forwarded by -target-assembler-version.
   if (Args.hasArg(options::OPT_fno_integrated_as)) {
     const llvm::Triple &T(getToolChain().getTriple());
     if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7)))
       CmdArgs.push_back("-Q");
   }
 
   // Forward -g, assuming we are dealing with an actual assembly file.
   if (SourceAction->getType() == types::TY_Asm ||
       SourceAction->getType() == types::TY_PP_Asm) {
     if (Args.hasArg(options::OPT_gstabs))
       CmdArgs.push_back("--gstabs");
     else if (Args.hasArg(options::OPT_g_Group))
       CmdArgs.push_back("-g");
   }
 
   // Derived from asm spec.
   AddMachOArch(Args, CmdArgs);
 
   // Use -force_cpusubtype_ALL on x86 by default.
   if (getToolChain().getArch() == llvm::Triple::x86 ||
       getToolChain().getArch() == llvm::Triple::x86_64 ||
       Args.hasArg(options::OPT_force__cpusubtype__ALL))
     CmdArgs.push_back("-force_cpusubtype_ALL");
 
   if (getToolChain().getArch() != llvm::Triple::x86_64 &&
       (((Args.hasArg(options::OPT_mkernel) ||
          Args.hasArg(options::OPT_fapple_kext)) &&
         getMachOToolChain().isKernelStatic()) ||
        Args.hasArg(options::OPT_static)))
     CmdArgs.push_back("-static");
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   assert(Output.isFilename() && "Unexpected lipo output.");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   assert(Input.isFilename() && "Invalid input.");
   CmdArgs.push_back(Input.getFilename());
 
   // asm_final spec is empty.
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::MachOTool::anchor() {}
 
 void darwin::MachOTool::AddMachOArch(const ArgList &Args,
                                      ArgStringList &CmdArgs) const {
   StringRef ArchName = getMachOToolChain().getMachOArchName(Args);
 
   // Derived from darwin_arch spec.
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(ArchName));
 
   // FIXME: Is this needed anymore?
   if (ArchName == "arm")
     CmdArgs.push_back("-force_cpusubtype_ALL");
 }
 
 bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const {
   // We only need to generate a temp path for LTO if we aren't compiling object
   // files. When compiling source files, we run 'dsymutil' after linking. We
   // don't run 'dsymutil' when compiling object files.
   for (const auto &Input : Inputs)
     if (Input.getType() != types::TY_Object)
       return true;
 
   return false;
 }
 
 void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
                                  ArgStringList &CmdArgs,
                                  const InputInfoList &Inputs) const {
   const Driver &D = getToolChain().getDriver();
   const toolchains::MachO &MachOTC = getMachOToolChain();
 
   unsigned Version[5] = {0, 0, 0, 0, 0};
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
     if (!Driver::GetReleaseVersion(A->getValue(), Version))
       D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args);
   }
 
   // Newer linkers support -demangle. Pass it if supported and not disabled by
   // the user.
   if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("-demangle");
 
   if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137)
     CmdArgs.push_back("-export_dynamic");
 
   // If we are using App Extension restrictions, pass a flag to the linker
   // telling it that the compiled code has been audited.
   if (Args.hasFlag(options::OPT_fapplication_extension,
                    options::OPT_fno_application_extension, false))
     CmdArgs.push_back("-application_extension");
 
   if (D.isUsingLTO()) {
     // If we are using LTO, then automatically create a temporary file path for
     // the linker to use, so that it's lifetime will extend past a possible
     // dsymutil step.
     if (Version[0] >= 116 && NeedsTempPath(Inputs)) {
       const char *TmpPath = C.getArgs().MakeArgString(
           D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
       C.addTempFile(TmpPath);
       CmdArgs.push_back("-object_path_lto");
       CmdArgs.push_back(TmpPath);
     }
 
     // Use -lto_library option to specify the libLTO.dylib path. Try to find
     // it in clang installed libraries. If not found, the option is not used
     // and 'ld' will use its default mechanism to search for libLTO.dylib.
     if (Version[0] >= 133) {
       // Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
       StringRef P = llvm::sys::path::parent_path(D.getInstalledDir());
       SmallString<128> LibLTOPath(P);
       llvm::sys::path::append(LibLTOPath, "lib");
       llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
       if (llvm::sys::fs::exists(LibLTOPath)) {
         CmdArgs.push_back("-lto_library");
         CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
       } else {
         D.Diag(diag::warn_drv_lto_libpath);
       }
     }
   }
 
   // Derived from the "link" spec.
   Args.AddAllArgs(CmdArgs, options::OPT_static);
   if (!Args.hasArg(options::OPT_static))
     CmdArgs.push_back("-dynamic");
   if (Args.hasArg(options::OPT_fgnu_runtime)) {
     // FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu
     // here. How do we wish to handle such things?
   }
 
   if (!Args.hasArg(options::OPT_dynamiclib)) {
     AddMachOArch(Args, CmdArgs);
     // FIXME: Why do this only on this path?
     Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL);
 
     Args.AddLastArg(CmdArgs, options::OPT_bundle);
     Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader);
     Args.AddAllArgs(CmdArgs, options::OPT_client__name);
 
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_compatibility__version)) ||
         (A = Args.getLastArg(options::OPT_current__version)) ||
         (A = Args.getLastArg(options::OPT_install__name)))
       D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
                                                        << "-dynamiclib";
 
     Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace);
     Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs);
     Args.AddLastArg(CmdArgs, options::OPT_private__bundle);
   } else {
     CmdArgs.push_back("-dylib");
 
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_bundle)) ||
         (A = Args.getLastArg(options::OPT_bundle__loader)) ||
         (A = Args.getLastArg(options::OPT_client__name)) ||
         (A = Args.getLastArg(options::OPT_force__flat__namespace)) ||
         (A = Args.getLastArg(options::OPT_keep__private__externs)) ||
         (A = Args.getLastArg(options::OPT_private__bundle)))
       D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
                                                       << "-dynamiclib";
 
     Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version,
                               "-dylib_compatibility_version");
     Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version,
                               "-dylib_current_version");
 
     AddMachOArch(Args, CmdArgs);
 
     Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name,
                               "-dylib_install_name");
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_all__load);
   Args.AddAllArgs(CmdArgs, options::OPT_allowable__client);
   Args.AddLastArg(CmdArgs, options::OPT_bind__at__load);
   if (MachOTC.isTargetIOSBased())
     Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal);
   Args.AddLastArg(CmdArgs, options::OPT_dead__strip);
   Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms);
   Args.AddAllArgs(CmdArgs, options::OPT_dylib__file);
   Args.AddLastArg(CmdArgs, options::OPT_dynamic);
   Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list);
   Args.AddLastArg(CmdArgs, options::OPT_flat__namespace);
   Args.AddAllArgs(CmdArgs, options::OPT_force__load);
   Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names);
   Args.AddAllArgs(CmdArgs, options::OPT_image__base);
   Args.AddAllArgs(CmdArgs, options::OPT_init);
 
   // Add the deployment target.
   MachOTC.addMinVersionArgs(Args, CmdArgs);
 
   Args.AddLastArg(CmdArgs, options::OPT_nomultidefs);
   Args.AddLastArg(CmdArgs, options::OPT_multi__module);
   Args.AddLastArg(CmdArgs, options::OPT_single__module);
   Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined);
   Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused);
 
   if (const Arg *A =
           Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
                           options::OPT_fno_pie, options::OPT_fno_PIE)) {
     if (A->getOption().matches(options::OPT_fpie) ||
         A->getOption().matches(options::OPT_fPIE))
       CmdArgs.push_back("-pie");
     else
       CmdArgs.push_back("-no_pie");
   }
   // for embed-bitcode, use -bitcode_bundle in linker command
   if (C.getDriver().embedBitcodeEnabled() ||
       C.getDriver().embedBitcodeMarkerOnly()) {
     // Check if the toolchain supports bitcode build flow.
     if (MachOTC.SupportsEmbeddedBitcode())
       CmdArgs.push_back("-bitcode_bundle");
     else
       D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain);
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_prebind);
   Args.AddLastArg(CmdArgs, options::OPT_noprebind);
   Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding);
   Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules);
   Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs);
   Args.AddAllArgs(CmdArgs, options::OPT_sectcreate);
   Args.AddAllArgs(CmdArgs, options::OPT_sectorder);
   Args.AddAllArgs(CmdArgs, options::OPT_seg1addr);
   Args.AddAllArgs(CmdArgs, options::OPT_segprot);
   Args.AddAllArgs(CmdArgs, options::OPT_segaddr);
   Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr);
   Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr);
   Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table);
   Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename);
   Args.AddAllArgs(CmdArgs, options::OPT_sub__library);
   Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella);
 
   // Give --sysroot= preference, over the Apple specific behavior to also use
   // --isysroot as the syslibroot.
   StringRef sysroot = C.getSysRoot();
   if (sysroot != "") {
     CmdArgs.push_back("-syslibroot");
     CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
   } else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
     CmdArgs.push_back("-syslibroot");
     CmdArgs.push_back(A->getValue());
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace);
   Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints);
   Args.AddAllArgs(CmdArgs, options::OPT_umbrella);
   Args.AddAllArgs(CmdArgs, options::OPT_undefined);
   Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list);
   Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches);
   Args.AddLastArg(CmdArgs, options::OPT_X_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_y);
   Args.AddLastArg(CmdArgs, options::OPT_w);
   Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size);
   Args.AddAllArgs(CmdArgs, options::OPT_segs__read__);
   Args.AddLastArg(CmdArgs, options::OPT_seglinkedit);
   Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit);
   Args.AddAllArgs(CmdArgs, options::OPT_sectalign);
   Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols);
   Args.AddAllArgs(CmdArgs, options::OPT_segcreate);
   Args.AddLastArg(CmdArgs, options::OPT_whyload);
   Args.AddLastArg(CmdArgs, options::OPT_whatsloaded);
   Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name);
   Args.AddLastArg(CmdArgs, options::OPT_dylinker);
   Args.AddLastArg(CmdArgs, options::OPT_Mach);
 }
 
 void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
                                   const ArgList &Args,
                                   const char *LinkingOutput) const {
   assert(Output.getType() == types::TY_Image && "Invalid linker output type.");
 
   // If the number of arguments surpasses the system limits, we will encode the
   // input files in a separate file, shortening the command line. To this end,
   // build a list of input file names that can be passed via a file with the
   // -filelist linker option.
   llvm::opt::ArgStringList InputFileList;
 
   // The logic here is derived from gcc's behavior; most of which
   // comes from specs (starting with link_command). Consult gcc for
   // more information.
   ArgStringList CmdArgs;
 
   /// Hack(tm) to ignore linking errors when we are doing ARC migration.
   if (Args.hasArg(options::OPT_ccc_arcmt_check,
                   options::OPT_ccc_arcmt_migrate)) {
     for (const auto &Arg : Args)
       Arg->claim();
     const char *Exec =
         Args.MakeArgString(getToolChain().GetProgramPath("touch"));
     CmdArgs.push_back(Output.getFilename());
     C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, None));
     return;
   }
 
   // I'm not sure why this particular decomposition exists in gcc, but
   // we follow suite for ease of comparison.
   AddLinkArgs(C, Args, CmdArgs, Inputs);
 
   // It seems that the 'e' option is completely ignored for dynamic executables
   // (the default), and with static executables, the last one wins, as expected.
   Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t,
                             options::OPT_Z_Flag, options::OPT_u_Group,
                             options::OPT_e, options::OPT_r});
 
   // Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading
   // members of static archive libraries which implement Objective-C classes or
   // categories.
   if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX))
     CmdArgs.push_back("-ObjC");
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
     getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);
 
   // SafeStack requires its own runtime libraries
   // These libraries should be linked first, to make sure the
   // __safestack_init constructor executes before everything else
   if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
     getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs,
                                           "libclang_rt.safestack_osx.a",
                                           /*AlwaysLink=*/true);
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
   // Build the input file for -filelist (list of linker input files) in case we
   // need it later
   for (const auto &II : Inputs) {
     if (!II.isFilename()) {
       // This is a linker input argument.
       // We cannot mix input arguments and file names in a -filelist input, thus
       // we prematurely stop our list (remaining files shall be passed as
       // arguments).
       if (InputFileList.size() > 0)
         break;
 
       continue;
     }
 
     InputFileList.push_back(II.getFilename());
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
     addOpenMPRuntime(CmdArgs, getToolChain(), Args);
 
   if (isObjCRuntimeLinked(Args) &&
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     // We use arclite library for both ARC and subscripting support.
     getMachOToolChain().AddLinkARCArgs(Args, CmdArgs);
 
     CmdArgs.push_back("-framework");
     CmdArgs.push_back("Foundation");
     // Link libobj.
     CmdArgs.push_back("-lobjc");
   }
 
   if (LinkingOutput) {
     CmdArgs.push_back("-arch_multiple");
     CmdArgs.push_back("-final_output");
     CmdArgs.push_back(LinkingOutput);
   }
 
   if (Args.hasArg(options::OPT_fnested_functions))
     CmdArgs.push_back("-allow_stack_execute");
 
   getMachOToolChain().addProfileRTLibs(Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (getToolChain().getDriver().CCCIsCXX())
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
 
     // link_ssp spec is empty.
 
     // Let the tool chain choose which runtime library to link.
     getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs);
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     // endfile_spec is empty.
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_F);
 
   // -iframework should be forwarded as -F.
   for (const Arg *A : Args.filtered(options::OPT_iframework))
     CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue()));
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
       if (A->getValue() == StringRef("Accelerate")) {
         CmdArgs.push_back("-framework");
         CmdArgs.push_back("Accelerate");
       }
     }
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   std::unique_ptr<Command> Cmd =
       llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs);
   Cmd->setInputFileList(std::move(InputFileList));
   C.addCommand(std::move(Cmd));
 }
 
 void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
                                 const ArgList &Args,
                                 const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   CmdArgs.push_back("-create");
   assert(Output.isFilename() && "Unexpected lipo output.");
 
   CmdArgs.push_back("-output");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs) {
     assert(II.isFilename() && "Unexpected lipo input.");
     CmdArgs.push_back(II.getFilename());
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
   const InputInfo &Input = Inputs[0];
   assert(Input.isFilename() && "Unexpected dsymutil input.");
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
                                        const InputInfo &Output,
                                        const InputInfoList &Inputs,
                                        const ArgList &Args,
                                        const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   CmdArgs.push_back("--verify");
   CmdArgs.push_back("--debug-info");
   CmdArgs.push_back("--eh-frame");
   CmdArgs.push_back("--quiet");
 
   assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
   const InputInfo &Input = Inputs[0];
   assert(Input.isFilename() && "Unexpected verify input");
 
   // Grabbing the output of the earlier dsymutil run.
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
                                       const ArgList &Args,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfo &Output,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   // Demangle C++ names in errors
   CmdArgs.push_back("-C");
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("_start");
   }
 
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
     CmdArgs.push_back("-dn");
   } else {
     CmdArgs.push_back("-Bdynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-shared");
     } else {
       CmdArgs.push_back("--dynamic-linker");
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("ld.so.1")));
     }
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
 
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("values-Xa.o")));
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
   getToolChain().AddFilePathLibArgs(Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
                             options::OPT_e, options::OPT_r});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (getToolChain().getDriver().CCCIsCXX())
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
     CmdArgs.push_back("-lgcc_s");
     CmdArgs.push_back("-lc");
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-lgcc");
       CmdArgs.push_back("-lm");
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
   }
   CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
 
   getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
                                       const ArgList &Args,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   switch (getToolChain().getArch()) {
   case llvm::Triple::x86:
     // When building 32-bit code on OpenBSD/amd64, we have to explicitly
     // instruct as in the base system to assemble 32-bit code.
     CmdArgs.push_back("--32");
     break;
 
   case llvm::Triple::ppc:
     CmdArgs.push_back("-mppc");
     CmdArgs.push_back("-many");
     break;
 
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
 
     CmdArgs.push_back("-mabi");
     CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
 
     if (getToolChain().getArch() == llvm::Triple::mips64)
       CmdArgs.push_back("-EB");
     else
       CmdArgs.push_back("-EL");
 
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   default:
     break;
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfo &Output,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (getToolChain().getArch() == llvm::Triple::mips64)
     CmdArgs.push_back("-EB");
   else if (getToolChain().getArch() == llvm::Triple::mips64el)
     CmdArgs.push_back("-EL");
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("__start");
   }
 
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     CmdArgs.push_back("--eh-frame-hdr");
     CmdArgs.push_back("-Bdynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-shared");
     } else {
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/usr/libexec/ld.so");
     }
   }
 
   if (Args.hasArg(options::OPT_nopie))
     CmdArgs.push_back("-nopie");
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
             Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o")));
       else
         CmdArgs.push_back(
             Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     } else {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     }
   }
 
   std::string Triple = getToolChain().getTripleString();
   if (Triple.substr(0, 6) == "x86_64")
     Triple.replace(0, 6, "amd64");
   CmdArgs.push_back(
       Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1"));
 
   Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
                             options::OPT_e, options::OPT_s, options::OPT_t,
                             options::OPT_Z_Flag, options::OPT_r});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lm_p");
       else
         CmdArgs.push_back("-lm");
     }
 
     // FIXME: For some reason GCC passes -lgcc before adding
     // the default system libraries. Just mimic this for now.
     CmdArgs.push_back("-lgcc");
 
     if (Args.hasArg(options::OPT_pthread)) {
       if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lpthread_p");
       else
         CmdArgs.push_back("-lpthread");
     }
 
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lc_p");
       else
         CmdArgs.push_back("-lc");
     }
 
     CmdArgs.push_back("-lgcc");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     else
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
                                   const ArgList &Args,
                                   const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("__start");
   }
 
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     CmdArgs.push_back("--eh-frame-hdr");
     CmdArgs.push_back("-Bdynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-shared");
     } else {
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/usr/libexec/ld.so");
     }
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
             Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o")));
       else
         CmdArgs.push_back(
             Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     } else {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     }
   }
 
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lm_p");
       else
         CmdArgs.push_back("-lm");
     }
 
     if (Args.hasArg(options::OPT_pthread)) {
       if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lpthread_p");
       else
         CmdArgs.push_back("-lpthread");
     }
 
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lc_p");
       else
         CmdArgs.push_back("-lc");
     }
 
     StringRef MyArch;
     switch (getToolChain().getArch()) {
     case llvm::Triple::arm:
       MyArch = "arm";
       break;
     case llvm::Triple::x86:
       MyArch = "i386";
       break;
     case llvm::Triple::x86_64:
       MyArch = "amd64";
       break;
     default:
       llvm_unreachable("Unsupported architecture");
     }
     CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch));
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     else
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
                                       const ArgList &Args,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   // When building 32-bit code on FreeBSD/amd64, we have to explicitly
   // instruct as in the base system to assemble 32-bit code.
   switch (getToolChain().getArch()) {
   default:
     break;
   case llvm::Triple::x86:
     CmdArgs.push_back("--32");
     break;
   case llvm::Triple::ppc:
     CmdArgs.push_back("-a32");
     break;
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
 
     CmdArgs.push_back("-march");
     CmdArgs.push_back(CPUName.data());
 
     CmdArgs.push_back("-mabi");
     CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
 
     if (getToolChain().getArch() == llvm::Triple::mips ||
         getToolChain().getArch() == llvm::Triple::mips64)
       CmdArgs.push_back("-EB");
     else
       CmdArgs.push_back("-EL");
 
     if (Arg *A = Args.getLastArg(options::OPT_G)) {
       StringRef v = A->getValue();
       CmdArgs.push_back(Args.MakeArgString("-G" + v));
       A->claim();
     }
 
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
     arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
 
     if (ABI == arm::FloatABI::Hard)
       CmdArgs.push_back("-mfpu=vfp");
     else
       CmdArgs.push_back("-mfpu=softvfp");
 
     switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::EABI:
       CmdArgs.push_back("-meabi=5");
       break;
 
     default:
       CmdArgs.push_back("-matpcs");
     }
     break;
   }
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9: {
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfo &Output,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
   const toolchains::FreeBSD &ToolChain =
       static_cast<const toolchains::FreeBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool IsPIE =
       !Args.hasArg(options::OPT_shared) &&
       (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (IsPIE)
     CmdArgs.push_back("-pie");
 
   CmdArgs.push_back("--eh-frame-hdr");
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-Bshareable");
     } else {
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/libexec/ld-elf.so.1");
     }
     if (ToolChain.getTriple().getOSMajorVersion() >= 9) {
       if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc ||
           Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
         CmdArgs.push_back("--hash-style=both");
       }
     }
     CmdArgs.push_back("--enable-new-dtags");
   }
 
   // When building 32-bit code on FreeBSD/amd64, we have to explicitly
   // instruct ld in the base system to link 32-bit code.
   if (Arch == llvm::Triple::x86) {
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf_i386_fbsd");
   }
 
   if (Arch == llvm::Triple::ppc) {
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf32ppc_fbsd");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_G)) {
     if (ToolChain.getArch() == llvm::Triple::mips ||
       ToolChain.getArch() == llvm::Triple::mipsel ||
       ToolChain.getArch() == llvm::Triple::mips64 ||
       ToolChain.getArch() == llvm::Triple::mips64el) {
       StringRef v = A->getValue();
       CmdArgs.push_back(Args.MakeArgString("-G" + v));
       A->claim();
     }
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     const char *crt1 = nullptr;
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         crt1 = "gcrt1.o";
       else if (IsPIE)
         crt1 = "Scrt1.o";
       else
         crt1 = "crt1.o";
     }
     if (crt1)
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
 
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
 
     const char *crtbegin = nullptr;
     if (Args.hasArg(options::OPT_static))
       crtbegin = "crtbeginT.o";
     else if (Args.hasArg(options::OPT_shared) || IsPIE)
       crtbegin = "crtbeginS.o";
     else
       crtbegin = "crtbegin.o";
 
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   Args.AddAllArgs(CmdArgs, options::OPT_s);
   Args.AddAllArgs(CmdArgs, options::OPT_t);
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
   if (D.isUsingLTO())
     AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     addOpenMPRuntime(CmdArgs, ToolChain, Args);
     if (D.CCCIsCXX()) {
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lm_p");
       else
         CmdArgs.push_back("-lm");
     }
     if (NeedsSanitizerDeps)
       linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
     // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding
     // the default system libraries. Just mimic this for now.
     if (Args.hasArg(options::OPT_pg))
       CmdArgs.push_back("-lgcc_p");
     else
       CmdArgs.push_back("-lgcc");
     if (Args.hasArg(options::OPT_static)) {
       CmdArgs.push_back("-lgcc_eh");
     } else if (Args.hasArg(options::OPT_pg)) {
       CmdArgs.push_back("-lgcc_eh_p");
     } else {
       CmdArgs.push_back("--as-needed");
       CmdArgs.push_back("-lgcc_s");
       CmdArgs.push_back("--no-as-needed");
     }
 
     if (Args.hasArg(options::OPT_pthread)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lpthread_p");
       else
         CmdArgs.push_back("-lpthread");
     }
 
     if (Args.hasArg(options::OPT_pg)) {
       if (Args.hasArg(options::OPT_shared))
         CmdArgs.push_back("-lc");
       else
         CmdArgs.push_back("-lc_p");
       CmdArgs.push_back("-lgcc_p");
     } else {
       CmdArgs.push_back("-lc");
       CmdArgs.push_back("-lgcc");
     }
 
     if (Args.hasArg(options::OPT_static)) {
       CmdArgs.push_back("-lgcc_eh");
     } else if (Args.hasArg(options::OPT_pg)) {
       CmdArgs.push_back("-lgcc_eh_p");
     } else {
       CmdArgs.push_back("--as-needed");
       CmdArgs.push_back("-lgcc_s");
       CmdArgs.push_back("--no-as-needed");
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || IsPIE)
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
     else
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
   }
 
   ToolChain.addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   // GNU as needs different flags for creating the correct output format
   // on architectures with different ABIs or optional feature sets.
   switch (getToolChain().getArch()) {
   case llvm::Triple::x86:
     CmdArgs.push_back("--32");
     break;
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
     StringRef MArch, MCPU;
     getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true);
     std::string Arch =
         arm::getARMTargetCPU(MCPU, MArch, getToolChain().getTriple());
     CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch));
     break;
   }
 
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
 
     CmdArgs.push_back("-march");
     CmdArgs.push_back(CPUName.data());
 
     CmdArgs.push_back("-mabi");
     CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data());
 
     if (getToolChain().getArch() == llvm::Triple::mips ||
         getToolChain().getArch() == llvm::Triple::mips64)
       CmdArgs.push_back("-EB");
     else
       CmdArgs.push_back("-EL");
 
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   default:
     break;
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as")));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
                                   const ArgList &Args,
                                   const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   CmdArgs.push_back("--eh-frame-hdr");
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-Bshareable");
     } else {
       Args.AddAllArgs(CmdArgs, options::OPT_pie);
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/libexec/ld.elf_so");
     }
   }
 
   // Many NetBSD architectures support more than one ABI.
   // Determine the correct emulation for ld.
   switch (getToolChain().getArch()) {
   case llvm::Triple::x86:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf_i386");
     break;
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     CmdArgs.push_back("-m");
     switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::EABI:
     case llvm::Triple::GNUEABI:
       CmdArgs.push_back("armelf_nbsd_eabi");
       break;
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
       CmdArgs.push_back("armelf_nbsd_eabihf");
       break;
     default:
       CmdArgs.push_back("armelf_nbsd");
       break;
     }
     break;
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
     arm::appendEBLinkFlags(
         Args, CmdArgs,
         llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
     CmdArgs.push_back("-m");
     switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::EABI:
     case llvm::Triple::GNUEABI:
       CmdArgs.push_back("armelfb_nbsd_eabi");
       break;
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
       CmdArgs.push_back("armelfb_nbsd_eabihf");
       break;
     default:
       CmdArgs.push_back("armelfb_nbsd");
       break;
     }
     break;
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     if (mips::hasMipsAbiArg(Args, "32")) {
       CmdArgs.push_back("-m");
       if (getToolChain().getArch() == llvm::Triple::mips64)
         CmdArgs.push_back("elf32btsmip");
       else
         CmdArgs.push_back("elf32ltsmip");
     } else if (mips::hasMipsAbiArg(Args, "64")) {
       CmdArgs.push_back("-m");
       if (getToolChain().getArch() == llvm::Triple::mips64)
         CmdArgs.push_back("elf64btsmip");
       else
         CmdArgs.push_back("elf64ltsmip");
     }
     break;
   case llvm::Triple::ppc:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf32ppc_nbsd");
     break;
 
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf64ppc");
     break;
 
   case llvm::Triple::sparc:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf32_sparc");
     break;
 
   case llvm::Triple::sparcv9:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf64_sparc");
     break;
 
   default:
     break;
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
     }
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     } else {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     }
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   Args.AddAllArgs(CmdArgs, options::OPT_s);
   Args.AddAllArgs(CmdArgs, options::OPT_t);
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   unsigned Major, Minor, Micro;
   getToolChain().getTriple().getOSVersion(Major, Minor, Micro);
   bool useLibgcc = true;
   if (Major >= 7 || Major == 0) {
     switch (getToolChain().getArch()) {
     case llvm::Triple::aarch64:
     case llvm::Triple::arm:
     case llvm::Triple::armeb:
     case llvm::Triple::thumb:
     case llvm::Triple::thumbeb:
     case llvm::Triple::ppc:
     case llvm::Triple::ppc64:
     case llvm::Triple::ppc64le:
     case llvm::Triple::sparc:
     case llvm::Triple::sparcv9:
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
       useLibgcc = false;
       break;
     default:
       break;
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     addOpenMPRuntime(CmdArgs, getToolChain(), Args);
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
     CmdArgs.push_back("-lc");
 
     if (useLibgcc) {
       if (Args.hasArg(options::OPT_static)) {
         // libgcc_eh depends on libc, so resolve as much as possible,
         // pull in any new requirements from libc and then get the rest
         // of libgcc.
         CmdArgs.push_back("-lgcc_eh");
         CmdArgs.push_back("-lc");
         CmdArgs.push_back("-lgcc");
       } else {
         CmdArgs.push_back("-lgcc");
         CmdArgs.push_back("--as-needed");
         CmdArgs.push_back("-lgcc_s");
         CmdArgs.push_back("--no-as-needed");
       }
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
     else
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                        const InputInfo &Output,
                                        const InputInfoList &Inputs,
                                        const ArgList &Args,
                                        const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   llvm::Triple Triple = llvm::Triple(TripleStr);
 
   ArgStringList CmdArgs;
 
   llvm::Reloc::Model RelocationModel;
   unsigned PICLevel;
   bool IsPIE;
   std::tie(RelocationModel, PICLevel, IsPIE) =
       ParsePICArgs(getToolChain(), Triple, Args);
 
   switch (getToolChain().getArch()) {
   default:
     break;
   // Add --32/--64 to make sure we get the format we want.
   // This is incomplete
   case llvm::Triple::x86:
     CmdArgs.push_back("--32");
     break;
   case llvm::Triple::x86_64:
     if (getToolChain().getTriple().getEnvironment() == llvm::Triple::GNUX32)
       CmdArgs.push_back("--x32");
     else
       CmdArgs.push_back("--64");
     break;
   case llvm::Triple::ppc:
     CmdArgs.push_back("-a32");
     CmdArgs.push_back("-mppc");
     CmdArgs.push_back("-many");
     break;
   case llvm::Triple::ppc64:
     CmdArgs.push_back("-a64");
     CmdArgs.push_back("-mppc64");
     CmdArgs.push_back("-many");
     break;
   case llvm::Triple::ppc64le:
     CmdArgs.push_back("-a64");
     CmdArgs.push_back("-mppc64");
     CmdArgs.push_back("-many");
     CmdArgs.push_back("-mlittle-endian");
     break;
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
     CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
     const llvm::Triple &Triple2 = getToolChain().getTriple();
     switch (Triple2.getSubArch()) {
     case llvm::Triple::ARMSubArch_v7:
       CmdArgs.push_back("-mfpu=neon");
       break;
     case llvm::Triple::ARMSubArch_v8:
       CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8");
       break;
     default:
       break;
     }
 
     switch (arm::getARMFloatABI(getToolChain(), Args)) {
     case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!");
     case arm::FloatABI::Soft:
       CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft"));
       break;
     case arm::FloatABI::SoftFP:
       CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp"));
       break;
     case arm::FloatABI::Hard:
       CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard"));
       break;
     }
 
     Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
 
     // FIXME: remove krait check when GNU tools support krait cpu
     // for now replace it with -mcpu=cortex-a15 to avoid a lower
     // march from being picked in the absence of a cpu flag.
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) &&
         StringRef(A->getValue()).lower() == "krait")
       CmdArgs.push_back("-mcpu=cortex-a15");
     else
       Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
     Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
     break;
   }
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
     ABIName = getGnuCompatibleMipsABIName(ABIName);
 
     CmdArgs.push_back("-march");
     CmdArgs.push_back(CPUName.data());
 
     CmdArgs.push_back("-mabi");
     CmdArgs.push_back(ABIName.data());
 
     // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
     // or -mshared (not implemented) is in effect.
     if (RelocationModel == llvm::Reloc::Static)
       CmdArgs.push_back("-mno-shared");
 
     // LLVM doesn't support -mplt yet and acts as if it is always given.
     // However, -mplt has no effect with the N64 ABI.
     CmdArgs.push_back(ABIName == "64" ? "-KPIC" : "-call_nonpic");
 
     if (getToolChain().getArch() == llvm::Triple::mips ||
         getToolChain().getArch() == llvm::Triple::mips64)
       CmdArgs.push_back("-EB");
     else
       CmdArgs.push_back("-EL");
 
     if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
       if (StringRef(A->getValue()) == "2008")
         CmdArgs.push_back(Args.MakeArgString("-mnan=2008"));
     }
 
     // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default.
     if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                  options::OPT_mfp64)) {
       A->claim();
       A->render(Args, CmdArgs);
     } else if (mips::shouldUseFPXX(
                    Args, getToolChain().getTriple(), CPUName, ABIName,
                    getMipsFloatABI(getToolChain().getDriver(), Args)))
       CmdArgs.push_back("-mfpxx");
 
     // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
     // -mno-mips16 is actually -no-mips16.
     if (Arg *A =
             Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) {
       if (A->getOption().matches(options::OPT_mips16)) {
         A->claim();
         A->render(Args, CmdArgs);
       } else {
         A->claim();
         CmdArgs.push_back("-no-mips16");
       }
     }
 
     Args.AddLastArg(CmdArgs, options::OPT_mmicromips,
                     options::OPT_mno_micromips);
     Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp);
     Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2);
 
     if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) {
       // Do not use AddLastArg because not all versions of MIPS assembler
       // support -mmsa / -mno-msa options.
       if (A->getOption().matches(options::OPT_mmsa))
         CmdArgs.push_back(Args.MakeArgString("-mmsa"));
     }
 
     Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
                     options::OPT_msoft_float);
 
     Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
                     options::OPT_msingle_float);
 
     Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
                     options::OPT_mno_odd_spreg);
 
     AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   case llvm::Triple::systemz: {
     // Always pass an -march option, since our default of z10 is later
     // than the GNU assembler's default.
     StringRef CPUName = getSystemZTargetCPU(Args);
     CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName));
     break;
   }
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_I);
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
   // TODO: Currently only works on linux with newer objcopy.
   if (Args.hasArg(options::OPT_gsplit_dwarf) &&
       getToolChain().getTriple().isOSLinux())
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
                    SplitDebugName(Args, Inputs[0]));
 }
 
 static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
                       ArgStringList &CmdArgs, const ArgList &Args) {
   bool isAndroid = Triple.isAndroid();
   bool isCygMing = Triple.isOSCygMing();
   bool IsIAMCU = Triple.isOSIAMCU();
   bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) ||
                       Args.hasArg(options::OPT_static);
   if (!D.CCCIsCXX())
     CmdArgs.push_back("-lgcc");
 
   if (StaticLibgcc || isAndroid) {
     if (D.CCCIsCXX())
       CmdArgs.push_back("-lgcc");
   } else {
     if (!D.CCCIsCXX() && !isCygMing)
       CmdArgs.push_back("--as-needed");
     CmdArgs.push_back("-lgcc_s");
     if (!D.CCCIsCXX() && !isCygMing)
       CmdArgs.push_back("--no-as-needed");
   }
 
   if (StaticLibgcc && !isAndroid && !IsIAMCU)
     CmdArgs.push_back("-lgcc_eh");
   else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX())
     CmdArgs.push_back("-lgcc");
 
   // According to Android ABI, we have to link with libdl if we are
   // linking with non-static libgcc.
   //
   // NOTE: This fixes a link error on Android MIPS as well.  The non-static
   // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl.
   if (isAndroid && !StaticLibgcc)
     CmdArgs.push_back("-ldl");
 }
 
 static void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
                            ArgStringList &CmdArgs, const ArgList &Args) {
   // Make use of compiler-rt if --rtlib option is used
   ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args);
 
   switch (RLT) {
   case ToolChain::RLT_CompilerRT:
     switch (TC.getTriple().getOS()) {
     default:
       llvm_unreachable("unsupported OS");
     case llvm::Triple::Win32:
     case llvm::Triple::Linux:
       addClangRT(TC, Args, CmdArgs);
       break;
     }
     break;
   case ToolChain::RLT_Libgcc:
     // Make sure libgcc is not used under MSVC environment by default
     if (TC.getTriple().isKnownWindowsMSVCEnvironment()) {
       // Issue error diagnostic if libgcc is explicitly specified
       // through command line as --rtlib option argument.
       if (Args.hasArg(options::OPT_rtlib_EQ)) {
         TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
             << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC";
       }
     } else
       AddLibgcc(TC.getTriple(), D, CmdArgs, Args);
     break;
   }
 }
 
 static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   switch (T.getArch()) {
   case llvm::Triple::x86:
     if (T.isOSIAMCU())
       return "elf_iamcu";
     return "elf_i386";
   case llvm::Triple::aarch64:
     return "aarch64linux";
   case llvm::Triple::aarch64_be:
     return "aarch64_be_linux";
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     return "armelf_linux_eabi";
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
     return "armelfb_linux_eabi";
   case llvm::Triple::ppc:
     return "elf32ppclinux";
   case llvm::Triple::ppc64:
     return "elf64ppc";
   case llvm::Triple::ppc64le:
     return "elf64lppc";
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
     return "elf32_sparc";
   case llvm::Triple::sparcv9:
     return "elf64_sparc";
   case llvm::Triple::mips:
     return "elf32btsmip";
   case llvm::Triple::mipsel:
     return "elf32ltsmip";
   case llvm::Triple::mips64:
     if (mips::hasMipsAbiArg(Args, "n32"))
       return "elf32btsmipn32";
     return "elf64btsmip";
   case llvm::Triple::mips64el:
     if (mips::hasMipsAbiArg(Args, "n32"))
       return "elf32ltsmipn32";
     return "elf64ltsmip";
   case llvm::Triple::systemz:
     return "elf64_s390";
   case llvm::Triple::x86_64:
     if (T.getEnvironment() == llvm::Triple::GNUX32)
       return "elf32_x86_64";
     return "elf_x86_64";
   default:
     llvm_unreachable("Unexpected arch");
   }
 }
 
 void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   const toolchains::Linux &ToolChain =
       static_cast<const toolchains::Linux &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
 
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   llvm::Triple Triple = llvm::Triple(TripleStr);
 
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool isAndroid = ToolChain.getTriple().isAndroid();
   const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
   const bool IsPIE =
       !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) &&
       (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
   const bool HasCRTBeginEndFiles =
       ToolChain.getTriple().hasEnvironment() ||
       (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies);
 
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   if (llvm::sys::path::filename(Exec) == "lld") {
     CmdArgs.push_back("-flavor");
     CmdArgs.push_back("old-gnu");
     CmdArgs.push_back("-target");
     CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString()));
   }
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (IsPIE)
     CmdArgs.push_back("-pie");
 
   if (Args.hasArg(options::OPT_rdynamic))
     CmdArgs.push_back("-export-dynamic");
 
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
   if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb)
     arm::appendEBLinkFlags(Args, CmdArgs, Triple);
 
   for (const auto &Opt : ToolChain.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
 
   if (!Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("--eh-frame-hdr");
   }
 
   CmdArgs.push_back("-m");
   CmdArgs.push_back(getLDMOption(ToolChain.getTriple(), Args));
 
   if (Args.hasArg(options::OPT_static)) {
     if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb ||
         Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb)
       CmdArgs.push_back("-Bstatic");
     else
       CmdArgs.push_back("-static");
   } else if (Args.hasArg(options::OPT_shared)) {
     CmdArgs.push_back("-shared");
   }
 
   if (!Args.hasArg(options::OPT_static)) {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
 
     if (!Args.hasArg(options::OPT_shared)) {
       const std::string Loader =
           D.DyldPrefix + ToolChain.getDynamicLinker(Args);
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back(Args.MakeArgString(Loader));
     }
   }
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!isAndroid && !IsIAMCU) {
       const char *crt1 = nullptr;
       if (!Args.hasArg(options::OPT_shared)) {
         if (Args.hasArg(options::OPT_pg))
           crt1 = "gcrt1.o";
         else if (IsPIE)
           crt1 = "Scrt1.o";
         else
           crt1 = "crt1.o";
       }
       if (crt1)
         CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
 
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
     }
 
     if (IsIAMCU)
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
     else {
       const char *crtbegin;
       if (Args.hasArg(options::OPT_static))
         crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
       else if (Args.hasArg(options::OPT_shared))
         crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
       else if (IsPIE)
         crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
       else
         crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
 
       if (HasCRTBeginEndFiles)
         CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
     }
 
     // Add crtfastmath.o if available and fast math is enabled.
     ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_u);
 
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
   if (D.isUsingLTO())
     AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
   bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
   // The profile runtime also needs access to system libraries.
   getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   if (D.CCCIsCXX() &&
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
                                !Args.hasArg(options::OPT_static);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bstatic");
     ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bdynamic");
     CmdArgs.push_back("-lm");
   }
   // Silence warnings when linking C code with a C++ '-stdlib' argument.
   Args.ClaimAllArgs(options::OPT_stdlib_EQ);
 
   if (!Args.hasArg(options::OPT_nostdlib)) {
     if (!Args.hasArg(options::OPT_nodefaultlibs)) {
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--start-group");
 
       if (NeedsSanitizerDeps)
         linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
 
       if (NeedsXRayDeps)
         linkXRayRuntimeDeps(ToolChain, Args, CmdArgs);
 
       bool WantPthread = Args.hasArg(options::OPT_pthread) ||
                          Args.hasArg(options::OPT_pthreads);
 
       if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
                        options::OPT_fno_openmp, false)) {
         // OpenMP runtimes implies pthreads when using the GNU toolchain.
         // FIXME: Does this really make sense for all GNU toolchains?
         WantPthread = true;
 
         // Also link the particular OpenMP runtimes.
         switch (getOpenMPRuntime(ToolChain, Args)) {
         case OMPRT_OMP:
           CmdArgs.push_back("-lomp");
           break;
         case OMPRT_GOMP:
           CmdArgs.push_back("-lgomp");
 
           // FIXME: Exclude this for platforms with libgomp that don't require
           // librt. Most modern Linux platforms require it, but some may not.
           CmdArgs.push_back("-lrt");
           break;
         case OMPRT_IOMP5:
           CmdArgs.push_back("-liomp5");
           break;
         case OMPRT_Unknown:
           // Already diagnosed.
           break;
         }
       }
 
       AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
 
       if (WantPthread && !isAndroid)
         CmdArgs.push_back("-lpthread");
 
       if (Args.hasArg(options::OPT_fsplit_stack))
         CmdArgs.push_back("--wrap=pthread_create");
 
       CmdArgs.push_back("-lc");
 
       // Add IAMCU specific libs, if needed.
       if (IsIAMCU)
         CmdArgs.push_back("-lgloss");
 
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--end-group");
       else
         AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
 
       // Add IAMCU specific libs (outside the group), if needed.
       if (IsIAMCU) {
         CmdArgs.push_back("--as-needed");
         CmdArgs.push_back("-lsoftfp");
         CmdArgs.push_back("--no-as-needed");
       }
     }
 
     if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) {
       const char *crtend;
       if (Args.hasArg(options::OPT_shared))
         crtend = isAndroid ? "crtend_so.o" : "crtendS.o";
       else if (IsPIE)
         crtend = isAndroid ? "crtend_android.o" : "crtendS.o";
       else
         crtend = isAndroid ? "crtend_android.o" : "crtend.o";
 
       if (HasCRTBeginEndFiles)
         CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
       if (!isAndroid)
         CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
     }
   }
 
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 // NaCl ARM assembly (inline or standalone) can be written with a set of macros
 // for the various SFI requirements like register masking. The assembly tool
 // inserts the file containing the macros as an input into all the assembly
 // jobs.
 void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA,
                                            const InputInfo &Output,
                                            const InputInfoList &Inputs,
                                            const ArgList &Args,
                                            const char *LinkingOutput) const {
   const toolchains::NaClToolChain &ToolChain =
       static_cast<const toolchains::NaClToolChain &>(getToolChain());
   InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(),
                        "nacl-arm-macros.s");
   InputInfoList NewInputs;
   NewInputs.push_back(NaClMacros);
   NewInputs.append(Inputs.begin(), Inputs.end());
   gnutools::Assembler::ConstructJob(C, JA, Output, NewInputs, Args,
                                     LinkingOutput);
 }
 
 // This is quite similar to gnutools::Linker::ConstructJob with changes that
 // we use static by default, do not yet support sanitizers or LTO, and a few
 // others. Eventually we can support more of that and hopefully migrate back
 // to gnutools::Linker.
 void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
 
   const toolchains::NaClToolChain &ToolChain =
       static_cast<const toolchains::NaClToolChain &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool IsStatic =
       !Args.hasArg(options::OPT_dynamic) && !Args.hasArg(options::OPT_shared);
 
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (Args.hasArg(options::OPT_rdynamic))
     CmdArgs.push_back("-export-dynamic");
 
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
   // NaClToolChain doesn't have ExtraOpts like Linux; the only relevant flag
   // from there is --build-id, which we do want.
   CmdArgs.push_back("--build-id");
 
   if (!IsStatic)
     CmdArgs.push_back("--eh-frame-hdr");
 
   CmdArgs.push_back("-m");
   if (Arch == llvm::Triple::x86)
     CmdArgs.push_back("elf_i386_nacl");
   else if (Arch == llvm::Triple::arm)
     CmdArgs.push_back("armelf_nacl");
   else if (Arch == llvm::Triple::x86_64)
     CmdArgs.push_back("elf_x86_64_nacl");
   else if (Arch == llvm::Triple::mipsel)
     CmdArgs.push_back("mipselelf_nacl");
   else
     D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName()
                                               << "Native Client";
 
   if (IsStatic)
     CmdArgs.push_back("-static");
   else if (Args.hasArg(options::OPT_shared))
     CmdArgs.push_back("-shared");
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
 
     const char *crtbegin;
     if (IsStatic)
       crtbegin = "crtbeginT.o";
     else if (Args.hasArg(options::OPT_shared))
       crtbegin = "crtbeginS.o";
     else
       crtbegin = "crtbegin.o";
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_u);
 
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (D.CCCIsCXX() &&
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic =
         Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic;
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bstatic");
     ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bdynamic");
     CmdArgs.push_back("-lm");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib)) {
     if (!Args.hasArg(options::OPT_nodefaultlibs)) {
       // Always use groups, since it has no effect on dynamic libraries.
       CmdArgs.push_back("--start-group");
       CmdArgs.push_back("-lc");
       // NaCl's libc++ currently requires libpthread, so just always include it
       // in the group for C++.
       if (Args.hasArg(options::OPT_pthread) ||
           Args.hasArg(options::OPT_pthreads) || D.CCCIsCXX()) {
         // Gold, used by Mips, handles nested groups differently than ld, and
         // without '-lnacl' it prefers symbols from libpthread.a over libnacl.a,
         // which is not a desired behaviour here.
         // See https://sourceware.org/ml/binutils/2015-03/msg00034.html
         if (getToolChain().getArch() == llvm::Triple::mipsel)
           CmdArgs.push_back("-lnacl");
 
         CmdArgs.push_back("-lpthread");
       }
 
       CmdArgs.push_back("-lgcc");
       CmdArgs.push_back("--as-needed");
       if (IsStatic)
         CmdArgs.push_back("-lgcc_eh");
       else
         CmdArgs.push_back("-lgcc_s");
       CmdArgs.push_back("--no-as-needed");
 
       // Mips needs to create and use pnacl_legacy library that contains
       // definitions from bitcode/pnaclmm.c and definitions for
       // __nacl_tp_tls_offset() and __nacl_tp_tdb_offset().
       if (getToolChain().getArch() == llvm::Triple::mipsel)
         CmdArgs.push_back("-lpnacl_legacy");
 
       CmdArgs.push_back("--end-group");
     }
 
     if (!Args.hasArg(options::OPT_nostartfiles)) {
       const char *crtend;
       if (Args.hasArg(options::OPT_shared))
         crtend = "crtendS.o";
       else
         crtend = "crtend.o";
 
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
     }
   }
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                  const InputInfo &Output,
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
     CmdArgs.push_back("-lc");
     CmdArgs.push_back("-lCompilerRT-Generic");
     CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib");
     CmdArgs.push_back(
         Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 /// DragonFly Tools
 
 // For now, DragonFly Assemble does just about the same as for
 // FreeBSD, but this may change soon.
 void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                         const InputInfo &Output,
                                         const InputInfoList &Inputs,
                                         const ArgList &Args,
                                         const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   // When building 32-bit code on DragonFly/pc64, we have to explicitly
   // instruct as in the base system to assemble 32-bit code.
   if (getToolChain().getArch() == llvm::Triple::x86)
     CmdArgs.push_back("--32");
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   CmdArgs.push_back("--eh-frame-hdr");
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     if (Args.hasArg(options::OPT_shared))
       CmdArgs.push_back("-Bshareable");
     else {
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/usr/libexec/ld-elf.so.2");
     }
     CmdArgs.push_back("--hash-style=gnu");
     CmdArgs.push_back("--enable-new-dtags");
   }
 
   // When building 32-bit code on DragonFly/pc64, we have to explicitly
   // instruct ld in the base system to link 32-bit code.
   if (getToolChain().getArch() == llvm::Triple::x86) {
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf_i386");
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
             Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o")));
       else {
         if (Args.hasArg(options::OPT_pie))
           CmdArgs.push_back(
               Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o")));
         else
           CmdArgs.push_back(
               Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
       }
     }
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     else
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     CmdArgs.push_back("-L/usr/lib/gcc50");
 
     if (!Args.hasArg(options::OPT_static)) {
       CmdArgs.push_back("-rpath");
       CmdArgs.push_back("/usr/lib/gcc50");
     }
 
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
 
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
 
     if (!Args.hasArg(options::OPT_nolibc)) {
       CmdArgs.push_back("-lc");
     }
 
     if (Args.hasArg(options::OPT_static) ||
         Args.hasArg(options::OPT_static_libgcc)) {
         CmdArgs.push_back("-lgcc");
         CmdArgs.push_back("-lgcc_eh");
     } else {
       if (Args.hasArg(options::OPT_shared_libgcc)) {
           CmdArgs.push_back("-lgcc_pic");
           if (!Args.hasArg(options::OPT_shared))
             CmdArgs.push_back("-lgcc");
       } else {
           CmdArgs.push_back("-lgcc");
           CmdArgs.push_back("--as-needed");
           CmdArgs.push_back("-lgcc_pic");
           CmdArgs.push_back("--no-as-needed");
       }
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
     else
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 // Try to find Exe from a Visual Studio distribution.  This first tries to find
 // an installed copy of Visual Studio and, failing that, looks in the PATH,
 // making sure that whatever executable that's found is not a same-named exe
 // from clang itself to prevent clang from falling back to itself.
 static std::string FindVisualStudioExecutable(const ToolChain &TC,
                                               const char *Exe,
                                               const char *ClangProgramPath) {
   const auto &MSVC = static_cast<const toolchains::MSVCToolChain &>(TC);
   std::string visualStudioBinDir;
   if (MSVC.getVisualStudioBinariesFolder(ClangProgramPath,
                                          visualStudioBinDir)) {
     SmallString<128> FilePath(visualStudioBinDir);
     llvm::sys::path::append(FilePath, Exe);
     if (llvm::sys::fs::can_execute(FilePath.c_str()))
       return FilePath.str();
   }
 
   return Exe;
 }
 
 void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                         const InputInfo &Output,
                                         const InputInfoList &Inputs,
                                         const ArgList &Args,
                                         const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   const ToolChain &TC = getToolChain();
 
   assert((Output.isFilename() || Output.isNothing()) && "invalid output");
   if (Output.isFilename())
     CmdArgs.push_back(
         Args.MakeArgString(std::string("-out:") + Output.getFilename()));
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
       !C.getDriver().IsCLMode())
     CmdArgs.push_back("-defaultlib:libcmt");
 
   if (!llvm::sys::Process::GetEnv("LIB")) {
     // If the VC environment hasn't been configured (perhaps because the user
     // did not run vcvarsall), try to build a consistent link environment.  If
     // the environment variable is set however, assume the user knows what
     // they're doing.
     std::string VisualStudioDir;
     const auto &MSVC = static_cast<const toolchains::MSVCToolChain &>(TC);
     if (MSVC.getVisualStudioInstallDir(VisualStudioDir)) {
       SmallString<128> LibDir(VisualStudioDir);
       llvm::sys::path::append(LibDir, "VC", "lib");
       switch (MSVC.getArch()) {
       case llvm::Triple::x86:
         // x86 just puts the libraries directly in lib
         break;
       case llvm::Triple::x86_64:
         llvm::sys::path::append(LibDir, "amd64");
         break;
       case llvm::Triple::arm:
         llvm::sys::path::append(LibDir, "arm");
         break;
       default:
         break;
       }
       CmdArgs.push_back(
           Args.MakeArgString(std::string("-libpath:") + LibDir.c_str()));
 
       if (MSVC.useUniversalCRT(VisualStudioDir)) {
         std::string UniversalCRTLibPath;
         if (MSVC.getUniversalCRTLibraryPath(UniversalCRTLibPath))
           CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
                                                UniversalCRTLibPath.c_str()));
       }
     }
 
     std::string WindowsSdkLibPath;
     if (MSVC.getWindowsSDKLibraryPath(WindowsSdkLibPath))
       CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
                                            WindowsSdkLibPath.c_str()));
   }
 
   if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L))
     for (const auto &LibPath : Args.getAllArgValues(options::OPT_L))
       CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath));
 
   CmdArgs.push_back("-nologo");
 
   if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7,
                   options::OPT__SLASH_Zd))
     CmdArgs.push_back("-debug");
 
   bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
                          options::OPT_shared);
   if (DLL) {
     CmdArgs.push_back(Args.MakeArgString("-dll"));
 
     SmallString<128> ImplibName(Output.getFilename());
     llvm::sys::path::replace_extension(ImplibName, "lib");
     CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName));
   }
 
   if (TC.getSanitizerArgs().needsAsanRt()) {
     CmdArgs.push_back(Args.MakeArgString("-debug"));
     CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
     if (Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
       for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
         CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
       // Make sure the dynamic runtime thunk is not optimized out at link time
       // to ensure proper SEH handling.
       CmdArgs.push_back(Args.MakeArgString("-include:___asan_seh_interceptor"));
     } else if (DLL) {
       CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
     } else {
       for (const auto &Lib : {"asan", "asan_cxx"})
         CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
     }
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link);
 
   if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
                    options::OPT_fno_openmp, false)) {
     CmdArgs.push_back("-nodefaultlib:vcomp.lib");
     CmdArgs.push_back("-nodefaultlib:vcompd.lib");
     CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
                                          TC.getDriver().Dir + "/../lib"));
     switch (getOpenMPRuntime(getToolChain(), Args)) {
     case OMPRT_OMP:
       CmdArgs.push_back("-defaultlib:libomp.lib");
       break;
     case OMPRT_IOMP5:
       CmdArgs.push_back("-defaultlib:libiomp5md.lib");
       break;
     case OMPRT_GOMP:
       break;
     case OMPRT_Unknown:
       // Already diagnosed.
       break;
     }
   }
 
   // Add compiler-rt lib in case if it was explicitly
   // specified as an argument for --rtlib option.
   if (!Args.hasArg(options::OPT_nostdlib)) {
     AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args);
   }
 
   // Add filenames, libraries, and other linker inputs.
   for (const auto &Input : Inputs) {
     if (Input.isFilename()) {
       CmdArgs.push_back(Input.getFilename());
       continue;
     }
 
     const Arg &A = Input.getInputArg();
 
     // Render -l options differently for the MSVC linker.
     if (A.getOption().matches(options::OPT_l)) {
       StringRef Lib = A.getValue();
       const char *LinkLibArg;
       if (Lib.endswith(".lib"))
         LinkLibArg = Args.MakeArgString(Lib);
       else
         LinkLibArg = Args.MakeArgString(Lib + ".lib");
       CmdArgs.push_back(LinkLibArg);
       continue;
     }
 
     // Otherwise, this is some other kind of linker input option like -Wl, -z,
     // or -L. Render it, even if MSVC doesn't understand it.
     A.renderAsInput(Args, CmdArgs);
   }
 
   TC.addProfileRTLibs(Args, CmdArgs);
 
   // We need to special case some linker paths.  In the case of lld, we need to
   // translate 'lld' into 'lld-link', and in the case of the regular msvc
   // linker, we need to use a special search algorithm.
   llvm::SmallString<128> linkPath;
   StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
   if (Linker.equals_lower("lld"))
     Linker = "lld-link";
 
   if (Linker.equals_lower("link")) {
     // If we're using the MSVC linker, it's not sufficient to just use link
     // from the program PATH, because other environments like GnuWin32 install
     // their own link.exe which may come first.
     linkPath = FindVisualStudioExecutable(TC, "link.exe",
                                           C.getDriver().getClangProgramPath());
   } else {
     linkPath = Linker;
     llvm::sys::path::replace_extension(linkPath, "exe");
     linkPath = TC.GetProgramPath(linkPath.c_str());
   }
 
   const char *Exec = Args.MakeArgString(linkPath);
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
                                           const InputInfo &Output,
                                           const InputInfoList &Inputs,
                                           const ArgList &Args,
                                           const char *LinkingOutput) const {
   C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput));
 }
 
 std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
     Compilation &C, const JobAction &JA, const InputInfo &Output,
     const InputInfoList &Inputs, const ArgList &Args,
     const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   CmdArgs.push_back("/nologo");
   CmdArgs.push_back("/c");  // Compile only.
   CmdArgs.push_back("/W0"); // No warnings.
 
   // The goal is to be able to invoke this tool correctly based on
   // any flag accepted by clang-cl.
 
   // These are spelled the same way in clang and cl.exe,.
   Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I});
 
   // Optimization level.
   if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin))
     CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi"
                                                                       : "/Oi-");
   if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) {
     if (A->getOption().getID() == options::OPT_O0) {
       CmdArgs.push_back("/Od");
     } else {
       CmdArgs.push_back("/Og");
 
       StringRef OptLevel = A->getValue();
       if (OptLevel == "s" || OptLevel == "z")
         CmdArgs.push_back("/Os");
       else
         CmdArgs.push_back("/Ot");
 
       CmdArgs.push_back("/Ob2");
     }
   }
   if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
                                options::OPT_fno_omit_frame_pointer))
     CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer
                           ? "/Oy"
                           : "/Oy-");
   if (!Args.hasArg(options::OPT_fwritable_strings))
     CmdArgs.push_back("/GF");
 
   // Flags for which clang-cl has an alias.
   // FIXME: How can we ensure this stays in sync with relevant clang-cl options?
 
   if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
                    /*default=*/false))
     CmdArgs.push_back("/GR-");
 
   if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS,
                    /*default=*/false))
     CmdArgs.push_back("/GS-");
 
   if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections,
                                options::OPT_fno_function_sections))
     CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections
                           ? "/Gy"
                           : "/Gy-");
   if (Arg *A = Args.getLastArg(options::OPT_fdata_sections,
                                options::OPT_fno_data_sections))
     CmdArgs.push_back(
         A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-");
   if (Args.hasArg(options::OPT_fsyntax_only))
     CmdArgs.push_back("/Zs");
   if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only,
                   options::OPT__SLASH_Z7))
     CmdArgs.push_back("/Z7");
 
   std::vector<std::string> Includes =
       Args.getAllArgValues(options::OPT_include);
   for (const auto &Include : Includes)
     CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include));
 
   // Flags that can simply be passed through.
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl);
 
   // The order of these flags is relevant, so pick the last one.
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd,
                                options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
     A->render(Args, CmdArgs);
 
   // Pass through all unknown arguments so that the fallback command can see
   // them too.
   Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN);
 
   // Input filename.
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
   assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX);
   CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp");
   if (II.isFilename())
     CmdArgs.push_back(II.getFilename());
   else
     II.getInputArg().renderAsInput(Args, CmdArgs);
 
   // Output filename.
   assert(Output.getType() == types::TY_Object);
   const char *Fo =
       Args.MakeArgString(std::string("/Fo") + Output.getFilename());
   CmdArgs.push_back(Fo);
 
   const Driver &D = getToolChain().getDriver();
   std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe",
                                                 D.getClangProgramPath());
   return llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
                                     CmdArgs, Inputs);
 }
 
 /// MinGW Tools
 void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   if (getToolChain().getArch() == llvm::Triple::x86) {
     CmdArgs.push_back("--32");
   } else if (getToolChain().getArch() == llvm::Triple::x86_64) {
     CmdArgs.push_back("--64");
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   if (Args.hasArg(options::OPT_gsplit_dwarf))
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
                    SplitDebugName(Args, Inputs[0]));
 }
 
 void MinGW::Linker::AddLibGCC(const ArgList &Args,
                               ArgStringList &CmdArgs) const {
   if (Args.hasArg(options::OPT_mthreads))
     CmdArgs.push_back("-lmingwthrd");
   CmdArgs.push_back("-lmingw32");
 
   // Make use of compiler-rt if --rtlib option is used
   ToolChain::RuntimeLibType RLT = getToolChain().GetRuntimeLibType(Args);
   if (RLT == ToolChain::RLT_Libgcc) {
     bool Static = Args.hasArg(options::OPT_static_libgcc) ||
                   Args.hasArg(options::OPT_static);
     bool Shared = Args.hasArg(options::OPT_shared);
     bool CXX = getToolChain().getDriver().CCCIsCXX();
 
     if (Static || (!CXX && !Shared)) {
       CmdArgs.push_back("-lgcc");
       CmdArgs.push_back("-lgcc_eh");
     } else {
       CmdArgs.push_back("-lgcc_s");
       CmdArgs.push_back("-lgcc");
     }
   } else {
     AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args);
   }
 
   CmdArgs.push_back("-lmoldname");
   CmdArgs.push_back("-lmingwex");
   CmdArgs.push_back("-lmsvcrt");
 }
 
 void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                  const InputInfo &Output,
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) const {
   const ToolChain &TC = getToolChain();
   const Driver &D = TC.getDriver();
   // const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
 
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   StringRef LinkerName = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "ld");
   if (LinkerName.equals_lower("lld")) {
     CmdArgs.push_back("-flavor");
     CmdArgs.push_back("gnu");
   } else if (!LinkerName.equals_lower("ld")) {
     D.Diag(diag::err_drv_unsupported_linker) << LinkerName;
   }
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
   CmdArgs.push_back("-m");
   if (TC.getArch() == llvm::Triple::x86)
     CmdArgs.push_back("i386pe");
   if (TC.getArch() == llvm::Triple::x86_64)
     CmdArgs.push_back("i386pep");
   if (TC.getArch() == llvm::Triple::arm)
     CmdArgs.push_back("thumb2pe");
 
   if (Args.hasArg(options::OPT_mwindows)) {
     CmdArgs.push_back("--subsystem");
     CmdArgs.push_back("windows");
   } else if (Args.hasArg(options::OPT_mconsole)) {
     CmdArgs.push_back("--subsystem");
     CmdArgs.push_back("console");
   }
 
   if (Args.hasArg(options::OPT_static))
     CmdArgs.push_back("-Bstatic");
   else {
     if (Args.hasArg(options::OPT_mdll))
       CmdArgs.push_back("--dll");
     else if (Args.hasArg(options::OPT_shared))
       CmdArgs.push_back("--shared");
     CmdArgs.push_back("-Bdynamic");
     if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-e");
       if (TC.getArch() == llvm::Triple::x86)
         CmdArgs.push_back("_DllMainCRTStartup@12");
       else
         CmdArgs.push_back("DllMainCRTStartup");
       CmdArgs.push_back("--enable-auto-image-base");
     }
   }
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   // FIXME: add -N, -n flags
   Args.AddLastArg(CmdArgs, options::OPT_r);
   Args.AddLastArg(CmdArgs, options::OPT_s);
   Args.AddLastArg(CmdArgs, options::OPT_t);
   Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
   Args.AddLastArg(CmdArgs, options::OPT_Z_Flag);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) {
       CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o")));
     } else {
       if (Args.hasArg(options::OPT_municode))
         CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2u.o")));
       else
         CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2.o")));
     }
     if (Args.hasArg(options::OPT_pg))
       CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("gcrt2.o")));
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   TC.AddFilePathLibArgs(Args, CmdArgs);
   AddLinkerInputs(TC, Inputs, Args, CmdArgs);
 
   // TODO: Add ASan stuff here
 
   // TODO: Add profile stuff here
 
   if (D.CCCIsCXX() &&
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
                                !Args.hasArg(options::OPT_static);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bstatic");
     TC.AddCXXStdlibLibArgs(Args, CmdArgs);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bdynamic");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib)) {
     if (!Args.hasArg(options::OPT_nodefaultlibs)) {
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--start-group");
 
       if (Args.hasArg(options::OPT_fstack_protector) ||
           Args.hasArg(options::OPT_fstack_protector_strong) ||
           Args.hasArg(options::OPT_fstack_protector_all)) {
         CmdArgs.push_back("-lssp_nonshared");
         CmdArgs.push_back("-lssp");
       }
       if (Args.hasArg(options::OPT_fopenmp))
         CmdArgs.push_back("-lgomp");
 
       AddLibGCC(Args, CmdArgs);
 
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lgmon");
 
       if (Args.hasArg(options::OPT_pthread))
         CmdArgs.push_back("-lpthread");
 
       // add system libraries
       if (Args.hasArg(options::OPT_mwindows)) {
         CmdArgs.push_back("-lgdi32");
         CmdArgs.push_back("-lcomdlg32");
       }
       CmdArgs.push_back("-ladvapi32");
       CmdArgs.push_back("-lshell32");
       CmdArgs.push_back("-luser32");
       CmdArgs.push_back("-lkernel32");
 
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--end-group");
       else if (!LinkerName.equals_lower("lld"))
         AddLibGCC(Args, CmdArgs);
     }
 
     if (!Args.hasArg(options::OPT_nostartfiles)) {
       // Add crtfastmath.o if available and fast math is enabled.
       TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
 
       CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
     }
   }
   const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data()));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 /// XCore Tools
 // We pass assemble and link construction to the xcc tool.
 
 void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   CmdArgs.push_back("-c");
 
   if (Args.hasArg(options::OPT_v))
     CmdArgs.push_back("-v");
 
   if (Arg *A = Args.getLastArg(options::OPT_g_Group))
     if (!A->getOption().matches(options::OPT_g0))
       CmdArgs.push_back("-g");
 
   if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
                    false))
     CmdArgs.push_back("-fverbose-asm");
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                  const InputInfo &Output,
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   if (Args.hasArg(options::OPT_v))
     CmdArgs.push_back("-v");
 
   // Pass -fexceptions through to the linker if it was present.
   if (Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
                    false))
     CmdArgs.push_back("-fexceptions");
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                            const InputInfo &Output,
                                            const InputInfoList &Inputs,
                                            const ArgList &Args,
                                            const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   const auto &TC =
       static_cast<const toolchains::CrossWindowsToolChain &>(getToolChain());
   ArgStringList CmdArgs;
   const char *Exec;
 
   switch (TC.getArch()) {
   default:
     llvm_unreachable("unsupported architecture");
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     break;
   case llvm::Triple::x86:
     CmdArgs.push_back("--32");
     break;
   case llvm::Triple::x86_64:
     CmdArgs.push_back("--64");
     break;
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   for (const auto &Input : Inputs)
     CmdArgs.push_back(Input.getFilename());
 
   const std::string Assembler = TC.GetProgramPath("as");
   Exec = Args.MakeArgString(Assembler);
 
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                         const InputInfo &Output,
                                         const InputInfoList &Inputs,
                                         const ArgList &Args,
                                         const char *LinkingOutput) const {
   const auto &TC =
       static_cast<const toolchains::CrossWindowsToolChain &>(getToolChain());
   const llvm::Triple &T = TC.getTriple();
   const Driver &D = TC.getDriver();
   SmallString<128> EntryPoint;
   ArgStringList CmdArgs;
   const char *Exec;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_w);
   // Other warning options are already handled somewhere else.
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (Args.hasArg(options::OPT_pie))
     CmdArgs.push_back("-pie");
   if (Args.hasArg(options::OPT_rdynamic))
     CmdArgs.push_back("-export-dynamic");
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("--strip-all");
 
   CmdArgs.push_back("-m");
   switch (TC.getArch()) {
   default:
     llvm_unreachable("unsupported architecture");
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     // FIXME: this is incorrect for WinCE
     CmdArgs.push_back("thumb2pe");
     break;
   case llvm::Triple::x86:
     CmdArgs.push_back("i386pe");
     EntryPoint.append("_");
     break;
   case llvm::Triple::x86_64:
     CmdArgs.push_back("i386pep");
     break;
   }
 
   if (Args.hasArg(options::OPT_shared)) {
     switch (T.getArch()) {
     default:
       llvm_unreachable("unsupported architecture");
     case llvm::Triple::arm:
     case llvm::Triple::thumb:
     case llvm::Triple::x86_64:
       EntryPoint.append("_DllMainCRTStartup");
       break;
     case llvm::Triple::x86:
       EntryPoint.append("_DllMainCRTStartup@12");
       break;
     }
 
     CmdArgs.push_back("-shared");
     CmdArgs.push_back("-Bdynamic");
 
     CmdArgs.push_back("--enable-auto-image-base");
 
     CmdArgs.push_back("--entry");
     CmdArgs.push_back(Args.MakeArgString(EntryPoint));
   } else {
     EntryPoint.append("mainCRTStartup");
 
     CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic"
                                                        : "-Bdynamic");
 
     if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
       CmdArgs.push_back("--entry");
       CmdArgs.push_back(Args.MakeArgString(EntryPoint));
     }
 
     // FIXME: handle subsystem
   }
 
   // NOTE: deal with multiple definitions on Windows (e.g. COMDAT)
   CmdArgs.push_back("--allow-multiple-definition");
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_rdynamic)) {
     SmallString<261> ImpLib(Output.getFilename());
     llvm::sys::path::replace_extension(ImpLib, ".lib");
 
     CmdArgs.push_back("--out-implib");
     CmdArgs.push_back(Args.MakeArgString(ImpLib));
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     const std::string CRTPath(D.SysRoot + "/usr/lib/");
     const char *CRTBegin;
 
     CRTBegin =
         Args.hasArg(options::OPT_shared) ? "crtbeginS.obj" : "crtbegin.obj";
     CmdArgs.push_back(Args.MakeArgString(CRTPath + CRTBegin));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   TC.AddFilePathLibArgs(Args, CmdArgs);
   AddLinkerInputs(TC, Inputs, Args, CmdArgs);
 
   if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
     bool StaticCXX = Args.hasArg(options::OPT_static_libstdcxx) &&
                      !Args.hasArg(options::OPT_static);
     if (StaticCXX)
       CmdArgs.push_back("-Bstatic");
     TC.AddCXXStdlibLibArgs(Args, CmdArgs);
     if (StaticCXX)
       CmdArgs.push_back("-Bdynamic");
   }
 
   if (!Args.hasArg(options::OPT_nostdlib)) {
     if (!Args.hasArg(options::OPT_nodefaultlibs)) {
       // TODO handle /MT[d] /MD[d]
       CmdArgs.push_back("-lmsvcrt");
       AddRunTimeLibs(TC, D, CmdArgs, Args);
     }
   }
 
   if (TC.getSanitizerArgs().needsAsanRt()) {
     // TODO handle /MT[d] /MD[d]
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
     } else {
       for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
         CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
       // Make sure the dynamic runtime thunk is not optimized out at link time
       // to ensure proper SEH handling.
       CmdArgs.push_back(Args.MakeArgString("--undefined"));
       CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86
                                                ? "___asan_seh_interceptor"
                                                : "__asan_seh_interceptor"));
     }
   }
 
   Exec = Args.MakeArgString(TC.GetLinkerPath());
 
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
                                           const InputInfo &Output,
                                           const InputInfoList &Inputs,
                                           const ArgList &Args,
                                           const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
   assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX ||
          II.getType() == types::TY_PP_CXX);
 
   if (JA.getKind() == Action::PreprocessJobClass) {
     Args.ClaimAllArgs();
     CmdArgs.push_back("-E");
   } else {
     assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm.
     CmdArgs.push_back("-S");
     CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified.
   }
   CmdArgs.push_back("-DMYRIAD2");
 
   // Append all -I, -iquote, -isystem paths, defines/undefines,
   // 'f' flags, optimize flags, and warning options.
   // These are spelled the same way in clang and moviCompile.
   Args.AddAllArgs(CmdArgs, {options::OPT_I_Group, options::OPT_clang_i_Group,
                             options::OPT_std_EQ, options::OPT_D, options::OPT_U,
                             options::OPT_f_Group, options::OPT_f_clang_Group,
                             options::OPT_g_Group, options::OPT_M_Group,
                             options::OPT_O_Group, options::OPT_W_Group,
                             options::OPT_mcpu_EQ});
 
   // If we're producing a dependency file, and assembly is the final action,
   // then the name of the target in the dependency file should be the '.o'
   // file, not the '.s' file produced by this step. For example, instead of
   //  /tmp/mumble.s: mumble.c .../someheader.h
   // the filename on the lefthand side should be "mumble.o"
   if (Args.getLastArg(options::OPT_MF) && !Args.getLastArg(options::OPT_MT) &&
       C.getActions().size() == 1 &&
       C.getActions()[0]->getKind() == Action::AssembleJobClass) {
     Arg *A = Args.getLastArg(options::OPT_o);
     if (A) {
       CmdArgs.push_back("-MT");
       CmdArgs.push_back(Args.MakeArgString(A->getValue()));
     }
   }
 
   CmdArgs.push_back(II.getFilename());
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   std::string Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("moviCompile"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
                                           CmdArgs, Inputs));
 }
 
 void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                            const InputInfo &Output,
                                            const InputInfoList &Inputs,
                                            const ArgList &Args,
                                            const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
   assert(II.getType() == types::TY_PP_Asm); // Require preprocessed asm input.
   assert(Output.getType() == types::TY_Object);
 
   CmdArgs.push_back("-no6thSlotCompression");
   const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
   if (CPUArg)
     CmdArgs.push_back(
         Args.MakeArgString("-cv:" + StringRef(CPUArg->getValue())));
   CmdArgs.push_back("-noSPrefixing");
   CmdArgs.push_back("-a"); // Mystery option.
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
   for (const Arg *A : Args.filtered(options::OPT_I, options::OPT_isystem)) {
     A->claim();
     CmdArgs.push_back(
         Args.MakeArgString(std::string("-i:") + A->getValue(0)));
   }
   CmdArgs.push_back("-elf"); // Output format.
   CmdArgs.push_back(II.getFilename());
   CmdArgs.push_back(
       Args.MakeArgString(std::string("-o:") + Output.getFilename()));
 
   std::string Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("moviAsm"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
                                           CmdArgs, Inputs));
 }
 
 void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                          const InputInfo &Output,
                                          const InputInfoList &Inputs,
                                          const ArgList &Args,
                                          const char *LinkingOutput) const {
   const auto &TC =
       static_cast<const toolchains::MyriadToolChain &>(getToolChain());
   const llvm::Triple &T = TC.getTriple();
   ArgStringList CmdArgs;
   bool UseStartfiles =
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles);
   bool UseDefaultLibs =
       !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs);
 
   if (T.getArch() == llvm::Triple::sparc)
     CmdArgs.push_back("-EB");
   else // SHAVE assumes little-endian, and sparcel is expressly so.
     CmdArgs.push_back("-EL");
 
   // The remaining logic is mostly like gnutools::Linker::ConstructJob,
   // but we never pass through a --sysroot option and various other bits.
   // For example, there are no sanitizers (yet) nor gold linker.
 
   // Eat some arguments that may be present but have no effect.
   Args.ClaimAllArgs(options::OPT_g_Group);
   Args.ClaimAllArgs(options::OPT_w);
   Args.ClaimAllArgs(options::OPT_static_libgcc);
 
   if (Args.hasArg(options::OPT_s)) // Pass the 'strip' option.
     CmdArgs.push_back("-s");
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   if (UseStartfiles) {
     // If you want startfiles, it means you want the builtin crti and crtbegin,
     // but not crt0. Myriad link commands provide their own crt0.o as needed.
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crti.o")));
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
                             options::OPT_e, options::OPT_s, options::OPT_t,
                             options::OPT_Z_Flag, options::OPT_r});
 
   TC.AddFilePathLibArgs(Args, CmdArgs);
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   if (UseDefaultLibs) {
     if (C.getDriver().CCCIsCXX())
       CmdArgs.push_back("-lstdc++");
     if (T.getOS() == llvm::Triple::RTEMS) {
       CmdArgs.push_back("--start-group");
       CmdArgs.push_back("-lc");
       // You must provide your own "-L" option to enable finding these.
       CmdArgs.push_back("-lrtemscpu");
       CmdArgs.push_back("-lrtemsbsp");
       CmdArgs.push_back("--end-group");
     } else {
       CmdArgs.push_back("-lc");
     }
     CmdArgs.push_back("-lgcc");
   }
   if (UseStartfiles) {
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtn.o")));
   }
 
   std::string Exec =
       Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
                                           CmdArgs, Inputs));
 }
 
 void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   assert(Inputs.size() == 1 && "Unexpected number of inputs.");
   const InputInfo &Input = Inputs[0];
   assert(Input.isFilename() && "Invalid input.");
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("orbis-as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) {
   const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
   if (SanArgs.needsUbsanRt()) {
     CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak");
   }
   if (SanArgs.needsAsanRt()) {
     CmdArgs.push_back("-lSceDbgAddressSanitizer_stub_weak");
   }
 }
 
 static void ConstructPS4LinkJob(const Tool &T, Compilation &C,
                                 const JobAction &JA, const InputInfo &Output,
                                 const InputInfoList &Inputs,
                                 const ArgList &Args,
                                 const char *LinkingOutput) {
   const toolchains::FreeBSD &ToolChain =
       static_cast<const toolchains::FreeBSD &>(T.getToolChain());
   const Driver &D = ToolChain.getDriver();
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (Args.hasArg(options::OPT_pie))
     CmdArgs.push_back("-pie");
 
   if (Args.hasArg(options::OPT_rdynamic))
     CmdArgs.push_back("-export-dynamic");
   if (Args.hasArg(options::OPT_shared))
     CmdArgs.push_back("--oformat=so");
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   AddPS4SanitizerArgs(ToolChain, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   Args.AddAllArgs(CmdArgs, options::OPT_s);
   Args.AddAllArgs(CmdArgs, options::OPT_t);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (Args.hasArg(options::OPT_pthread)) {
     CmdArgs.push_back("-lpthread");
   }
 
   const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld"));
 
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
 }
 
 static void ConstructGoldLinkJob(const Tool &T, Compilation &C,
                                  const JobAction &JA, const InputInfo &Output,
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) {
   const toolchains::FreeBSD &ToolChain =
       static_cast<const toolchains::FreeBSD &>(T.getToolChain());
   const Driver &D = ToolChain.getDriver();
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
   // and "clang -emit-llvm foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   // and for "clang -w foo.o -o foo". Other warning options are already
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   if (Args.hasArg(options::OPT_pie))
     CmdArgs.push_back("-pie");
 
   if (Args.hasArg(options::OPT_static)) {
     CmdArgs.push_back("-Bstatic");
   } else {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
     CmdArgs.push_back("--eh-frame-hdr");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-Bshareable");
     } else {
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/libexec/ld-elf.so.1");
     }
     CmdArgs.push_back("--enable-new-dtags");
   }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
   } else {
     assert(Output.isNothing() && "Invalid output.");
   }
 
   AddPS4SanitizerArgs(ToolChain, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     const char *crt1 = nullptr;
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         crt1 = "gcrt1.o";
       else if (Args.hasArg(options::OPT_pie))
         crt1 = "Scrt1.o";
       else
         crt1 = "crt1.o";
     }
     if (crt1)
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
 
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
 
     const char *crtbegin = nullptr;
     if (Args.hasArg(options::OPT_static))
       crtbegin = "crtbeginT.o";
     else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       crtbegin = "crtbeginS.o";
     else
       crtbegin = "crtbegin.o";
 
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   Args.AddAllArgs(CmdArgs, options::OPT_s);
   Args.AddAllArgs(CmdArgs, options::OPT_t);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     // For PS4, we always want to pass libm, libstdc++ and libkernel
     // libraries for both C and C++ compilations.
     CmdArgs.push_back("-lkernel");
     if (D.CCCIsCXX()) {
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lm_p");
       else
         CmdArgs.push_back("-lm");
     }
     // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding
     // the default system libraries. Just mimic this for now.
     if (Args.hasArg(options::OPT_pg))
       CmdArgs.push_back("-lgcc_p");
     else
       CmdArgs.push_back("-lcompiler_rt");
     if (Args.hasArg(options::OPT_static)) {
       CmdArgs.push_back("-lstdc++");
     } else if (Args.hasArg(options::OPT_pg)) {
       CmdArgs.push_back("-lgcc_eh_p");
     } else {
       CmdArgs.push_back("--as-needed");
       CmdArgs.push_back("-lstdc++");
       CmdArgs.push_back("--no-as-needed");
     }
 
     if (Args.hasArg(options::OPT_pthread)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lpthread_p");
       else
         CmdArgs.push_back("-lpthread");
     }
 
     if (Args.hasArg(options::OPT_pg)) {
       if (Args.hasArg(options::OPT_shared))
         CmdArgs.push_back("-lc");
       else {
         if (Args.hasArg(options::OPT_static)) {
           CmdArgs.push_back("--start-group");
           CmdArgs.push_back("-lc_p");
           CmdArgs.push_back("-lpthread_p");
           CmdArgs.push_back("--end-group");
         } else {
           CmdArgs.push_back("-lc_p");
         }
       }
       CmdArgs.push_back("-lgcc_p");
     } else {
       if (Args.hasArg(options::OPT_static)) {
         CmdArgs.push_back("--start-group");
         CmdArgs.push_back("-lc");
         CmdArgs.push_back("-lpthread");
         CmdArgs.push_back("--end-group");
       } else {
         CmdArgs.push_back("-lc");
       }
       CmdArgs.push_back("-lcompiler_rt");
     }
 
     if (Args.hasArg(options::OPT_static)) {
       CmdArgs.push_back("-lstdc++");
     } else if (Args.hasArg(options::OPT_pg)) {
       CmdArgs.push_back("-lgcc_eh_p");
     } else {
       CmdArgs.push_back("--as-needed");
       CmdArgs.push_back("-lstdc++");
       CmdArgs.push_back("--no-as-needed");
     }
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
     else
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
   }
 
   const char *Exec =
 #ifdef LLVM_ON_WIN32
       Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold"));
 #else
       Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld"));
 #endif
 
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
 }
 
 void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
                                 const ArgList &Args,
                                 const char *LinkingOutput) const {
   const toolchains::FreeBSD &ToolChain =
       static_cast<const toolchains::FreeBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   bool PS4Linker;
   StringRef LinkerOptName;
   if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) {
     LinkerOptName = A->getValue();
     if (LinkerOptName != "ps4" && LinkerOptName != "gold")
       D.Diag(diag::err_drv_unsupported_linker) << LinkerOptName;
   }
 
   if (LinkerOptName == "gold")
     PS4Linker = false;
   else if (LinkerOptName == "ps4")
     PS4Linker = true;
   else
     PS4Linker = !Args.hasArg(options::OPT_shared);
 
   if (PS4Linker)
     ConstructPS4LinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
   else
     ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
 }
 
 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs,
                                     const ArgList &Args,
                                     const char *LinkingOutput) const {
   const auto &TC =
       static_cast<const toolchains::CudaToolChain &>(getToolChain());
   assert(TC.getTriple().isNVPTX() && "Wrong platform");
 
   // Obtain architecture from the action.
   CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
   assert(gpu_arch != CudaArch::UNKNOWN &&
          "Device action expected to have an architecture.");
 
   // Check that our installation's ptxas supports gpu_arch.
   if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
     TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch);
   }
 
   ArgStringList CmdArgs;
   CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
   if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
                    options::OPT_no_cuda_noopt_device_debug, false)) {
     // ptxas does not accept -g option if optimization is enabled, so
     // we ignore the compiler's -O* options if we want debug info.
     CmdArgs.push_back("-g");
     CmdArgs.push_back("--dont-merge-basicblocks");
     CmdArgs.push_back("--return-at-end");
   } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
     // Map the -O we received to -O{0,1,2,3}.
     //
     // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
     // default, so it may correspond more closely to the spirit of clang -O2.
 
     // -O3 seems like the least-bad option when -Osomething is specified to
     // clang but it isn't handled below.
     StringRef OOpt = "3";
     if (A->getOption().matches(options::OPT_O4) ||
         A->getOption().matches(options::OPT_Ofast))
       OOpt = "3";
     else if (A->getOption().matches(options::OPT_O0))
       OOpt = "0";
     else if (A->getOption().matches(options::OPT_O)) {
       // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
       OOpt = llvm::StringSwitch<const char *>(A->getValue())
                  .Case("1", "1")
                  .Case("2", "2")
                  .Case("3", "3")
                  .Case("s", "2")
                  .Case("z", "2")
                  .Default("2");
     }
     CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
   } else {
     // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
     // to no optimizations, but ptxas's default is -O3.
     CmdArgs.push_back("-O0");
   }
 
   CmdArgs.push_back("--gpu-name");
   CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
   CmdArgs.push_back("--output-file");
   CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
   for (const auto& II : Inputs)
     CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
 
   for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
     CmdArgs.push_back(Args.MakeArgString(A));
 
   const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 // All inputs to this linker must be from CudaDeviceActions, as we need to look
 // at the Inputs' Actions in order to figure out which GPU architecture they
 // correspond to.
 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                  const InputInfo &Output,
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) const {
   const auto &TC =
       static_cast<const toolchains::CudaToolChain &>(getToolChain());
   assert(TC.getTriple().isNVPTX() && "Wrong platform");
 
   ArgStringList CmdArgs;
   CmdArgs.push_back("--cuda");
   CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
   CmdArgs.push_back(Args.MakeArgString("--create"));
   CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
 
   for (const auto& II : Inputs) {
     auto *A = II.getAction();
     assert(A->getInputs().size() == 1 &&
            "Device offload action is expected to have a single input");
     const char *gpu_arch_str = A->getOffloadingArch();
     assert(gpu_arch_str &&
            "Device action expected to have associated a GPU architecture!");
     CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
 
     // We need to pass an Arch of the form "sm_XX" for cubin files and
     // "compute_XX" for ptx.
     const char *Arch =
         (II.getType() == types::TY_PP_Asm)
             ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
             : gpu_arch_str;
     CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
                                          Arch + ",file=" + II.getFilename()));
   }
 
   for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
     CmdArgs.push_back(Args.MakeArgString(A));
 
   const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }