Index: head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp
===================================================================
--- head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp	(revision 337281)
+++ head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp	(revision 337282)
@@ -1,426 +1,430 @@
 //===- AArch64.cpp --------------------------------------------------------===//
 //
 //                             The LLVM Linker
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
 using namespace lld;
 using namespace lld::elf;
 
 // Page(Expr) is the page address of the expression Expr, defined
 // as (Expr & ~0xFFF). (This applies even if the machine page size
 // supported by the platform has a different value.)
 uint64_t elf::getAArch64Page(uint64_t Expr) {
   return Expr & ~static_cast<uint64_t>(0xFFF);
 }
 
 namespace {
 class AArch64 final : public TargetInfo {
 public:
   AArch64();
   RelExpr getRelExpr(RelType Type, const Symbol &S,
                      const uint8_t *Loc) const override;
   bool isPicRel(RelType Type) const override;
   void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
   void writePltHeader(uint8_t *Buf) const override;
   void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
                 int32_t Index, unsigned RelOff) const override;
   bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
                   uint64_t BranchAddr, const Symbol &S) const override;
   bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
   bool usesOnlyLowPageBits(RelType Type) const override;
   void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
                           RelExpr Expr) const override;
   void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
 };
 } // namespace
 
 AArch64::AArch64() {
   CopyRel = R_AARCH64_COPY;
   RelativeRel = R_AARCH64_RELATIVE;
   IRelativeRel = R_AARCH64_IRELATIVE;
   GotRel = R_AARCH64_GLOB_DAT;
   PltRel = R_AARCH64_JUMP_SLOT;
   TlsDescRel = R_AARCH64_TLSDESC;
   TlsGotRel = R_AARCH64_TLS_TPREL64;
   GotEntrySize = 8;
   GotPltEntrySize = 8;
   PltEntrySize = 16;
   PltHeaderSize = 32;
   DefaultMaxPageSize = 65536;
 
+  // Align to the 2 MiB page size (known as a superpage or huge page).
+  // FreeBSD automatically promotes 2 MiB-aligned allocations.
+  DefaultImageBase = 0x200000;
+
   // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant
   // 1 of the tls structures and the tcb size is 16.
   TcbSize = 16;
   NeedsThunks = true;
 
   // See comment in Arch/ARM.cpp for a more detailed explanation of
   // ThunkSectionSpacing. For AArch64 the only branches we are permitted to
   // Thunk have a range of +/- 128 MiB
   ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000;
 }
 
 RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
                             const uint8_t *Loc) const {
   switch (Type) {
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     return R_TLSDESC_PAGE;
   case R_AARCH64_TLSDESC_LD64_LO12:
   case R_AARCH64_TLSDESC_ADD_LO12:
     return R_TLSDESC;
   case R_AARCH64_TLSDESC_CALL:
     return R_TLSDESC_CALL;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
     return R_TLS;
   case R_AARCH64_CALL26:
   case R_AARCH64_CONDBR19:
   case R_AARCH64_JUMP26:
   case R_AARCH64_TSTBR14:
     return R_PLT_PC;
   case R_AARCH64_PREL16:
   case R_AARCH64_PREL32:
   case R_AARCH64_PREL64:
   case R_AARCH64_ADR_PREL_LO21:
   case R_AARCH64_LD_PREL_LO19:
     return R_PC;
   case R_AARCH64_ADR_PREL_PG_HI21:
     return R_PAGE_PC;
   case R_AARCH64_LD64_GOT_LO12_NC:
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
     return R_GOT;
   case R_AARCH64_ADR_GOT_PAGE:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
     return R_GOT_PAGE_PC;
   case R_AARCH64_NONE:
     return R_NONE;
   default:
     return R_ABS;
   }
 }
 
 RelExpr AArch64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
                                  RelExpr Expr) const {
   if (Expr == R_RELAX_TLS_GD_TO_IE) {
     if (Type == R_AARCH64_TLSDESC_ADR_PAGE21)
       return R_RELAX_TLS_GD_TO_IE_PAGE_PC;
     return R_RELAX_TLS_GD_TO_IE_ABS;
   }
   return Expr;
 }
 
 bool AArch64::usesOnlyLowPageBits(RelType Type) const {
   switch (Type) {
   default:
     return false;
   case R_AARCH64_ADD_ABS_LO12_NC:
   case R_AARCH64_LD64_GOT_LO12_NC:
   case R_AARCH64_LDST128_ABS_LO12_NC:
   case R_AARCH64_LDST16_ABS_LO12_NC:
   case R_AARCH64_LDST32_ABS_LO12_NC:
   case R_AARCH64_LDST64_ABS_LO12_NC:
   case R_AARCH64_LDST8_ABS_LO12_NC:
   case R_AARCH64_TLSDESC_ADD_LO12:
   case R_AARCH64_TLSDESC_LD64_LO12:
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
     return true;
   }
 }
 
 bool AArch64::isPicRel(RelType Type) const {
   return Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64;
 }
 
 void AArch64::writeGotPlt(uint8_t *Buf, const Symbol &) const {
   write64le(Buf, InX::Plt->getVA());
 }
 
 void AArch64::writePltHeader(uint8_t *Buf) const {
   const uint8_t PltData[] = {
       0xf0, 0x7b, 0xbf, 0xa9, // stp    x16, x30, [sp,#-16]!
       0x10, 0x00, 0x00, 0x90, // adrp   x16, Page(&(.plt.got[2]))
       0x11, 0x02, 0x40, 0xf9, // ldr    x17, [x16, Offset(&(.plt.got[2]))]
       0x10, 0x02, 0x00, 0x91, // add    x16, x16, Offset(&(.plt.got[2]))
       0x20, 0x02, 0x1f, 0xd6, // br     x17
       0x1f, 0x20, 0x03, 0xd5, // nop
       0x1f, 0x20, 0x03, 0xd5, // nop
       0x1f, 0x20, 0x03, 0xd5  // nop
   };
   memcpy(Buf, PltData, sizeof(PltData));
 
   uint64_t Got = InX::GotPlt->getVA();
   uint64_t Plt = InX::Plt->getVA();
   relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
               getAArch64Page(Got + 16) - getAArch64Page(Plt + 4));
   relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16);
   relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16);
 }
 
 void AArch64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
                        uint64_t PltEntryAddr, int32_t Index,
                        unsigned RelOff) const {
   const uint8_t Inst[] = {
       0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n]))
       0x11, 0x02, 0x40, 0xf9, // ldr  x17, [x16, Offset(&(.plt.got[n]))]
       0x10, 0x02, 0x00, 0x91, // add  x16, x16, Offset(&(.plt.got[n]))
       0x20, 0x02, 0x1f, 0xd6  // br   x17
   };
   memcpy(Buf, Inst, sizeof(Inst));
 
   relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21,
               getAArch64Page(GotPltEntryAddr) - getAArch64Page(PltEntryAddr));
   relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotPltEntryAddr);
   relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr);
 }
 
 bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
                          uint64_t BranchAddr, const Symbol &S) const {
   // ELF for the ARM 64-bit architecture, section Call and Jump relocations
   // only permits range extension thunks for R_AARCH64_CALL26 and
   // R_AARCH64_JUMP26 relocation types.
   if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
     return false;
   uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
   return !inBranchRange(Type, BranchAddr, Dst);
 }
 
 bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
   if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
     return true;
   // The AArch64 call and unconditional branch instructions have a range of
   // +/- 128 MiB.
   uint64_t Range = 128 * 1024 * 1024;
   if (Dst > Src) {
     // Immediate of branch is signed.
     Range -= 4;
     return Dst - Src <= Range;
   }
   return Src - Dst <= Range;
 }
 
 static void write32AArch64Addr(uint8_t *L, uint64_t Imm) {
   uint32_t ImmLo = (Imm & 0x3) << 29;
   uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
   uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
   write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi);
 }
 
 // Return the bits [Start, End] from Val shifted Start bits.
 // For instance, getBits(0xF0, 4, 8) returns 0xF.
 static uint64_t getBits(uint64_t Val, int Start, int End) {
   uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1;
   return (Val >> Start) & Mask;
 }
 
 static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); }
 
 // Update the immediate field in a AARCH64 ldr, str, and add instruction.
 static void or32AArch64Imm(uint8_t *L, uint64_t Imm) {
   or32le(L, (Imm & 0xFFF) << 10);
 }
 
 void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
   switch (Type) {
   case R_AARCH64_ABS16:
   case R_AARCH64_PREL16:
     checkIntUInt<16>(Loc, Val, Type);
     write16le(Loc, Val);
     break;
   case R_AARCH64_ABS32:
   case R_AARCH64_PREL32:
     checkIntUInt<32>(Loc, Val, Type);
     write32le(Loc, Val);
     break;
   case R_AARCH64_ABS64:
   case R_AARCH64_GLOB_DAT:
   case R_AARCH64_PREL64:
     write64le(Loc, Val);
     break;
   case R_AARCH64_ADD_ABS_LO12_NC:
     or32AArch64Imm(Loc, Val);
     break;
   case R_AARCH64_ADR_GOT_PAGE:
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     checkInt<33>(Loc, Val, Type);
     write32AArch64Addr(Loc, Val >> 12);
     break;
   case R_AARCH64_ADR_PREL_LO21:
     checkInt<21>(Loc, Val, Type);
     write32AArch64Addr(Loc, Val);
     break;
   case R_AARCH64_JUMP26:
     // Normally we would just write the bits of the immediate field, however
     // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
     // we want to replace a non-branch instruction with a branch immediate
     // instruction. By writing all the bits of the instruction including the
     // opcode and the immediate (0 001 | 01 imm26) we can do this
     // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
     // the instruction we want to patch.
     write32le(Loc, 0x14000000);
     LLVM_FALLTHROUGH;
   case R_AARCH64_CALL26:
     checkInt<28>(Loc, Val, Type);
     or32le(Loc, (Val & 0x0FFFFFFC) >> 2);
     break;
   case R_AARCH64_CONDBR19:
   case R_AARCH64_LD_PREL_LO19:
     checkAlignment<4>(Loc, Val, Type);
     checkInt<21>(Loc, Val, Type);
     or32le(Loc, (Val & 0x1FFFFC) << 3);
     break;
   case R_AARCH64_LD64_GOT_LO12_NC:
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case R_AARCH64_TLSDESC_LD64_LO12:
     checkAlignment<8>(Loc, Val, Type);
     or32le(Loc, (Val & 0xFF8) << 7);
     break;
   case R_AARCH64_LDST8_ABS_LO12_NC:
     or32AArch64Imm(Loc, getBits(Val, 0, 11));
     break;
   case R_AARCH64_LDST16_ABS_LO12_NC:
     checkAlignment<2>(Loc, Val, Type);
     or32AArch64Imm(Loc, getBits(Val, 1, 11));
     break;
   case R_AARCH64_LDST32_ABS_LO12_NC:
     checkAlignment<4>(Loc, Val, Type);
     or32AArch64Imm(Loc, getBits(Val, 2, 11));
     break;
   case R_AARCH64_LDST64_ABS_LO12_NC:
     checkAlignment<8>(Loc, Val, Type);
     or32AArch64Imm(Loc, getBits(Val, 3, 11));
     break;
   case R_AARCH64_LDST128_ABS_LO12_NC:
     checkAlignment<16>(Loc, Val, Type);
     or32AArch64Imm(Loc, getBits(Val, 4, 11));
     break;
   case R_AARCH64_MOVW_UABS_G0_NC:
     or32le(Loc, (Val & 0xFFFF) << 5);
     break;
   case R_AARCH64_MOVW_UABS_G1_NC:
     or32le(Loc, (Val & 0xFFFF0000) >> 11);
     break;
   case R_AARCH64_MOVW_UABS_G2_NC:
     or32le(Loc, (Val & 0xFFFF00000000) >> 27);
     break;
   case R_AARCH64_MOVW_UABS_G3:
     or32le(Loc, (Val & 0xFFFF000000000000) >> 43);
     break;
   case R_AARCH64_TSTBR14:
     checkInt<16>(Loc, Val, Type);
     or32le(Loc, (Val & 0xFFFC) << 3);
     break;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
     checkInt<24>(Loc, Val, Type);
     or32AArch64Imm(Loc, Val >> 12);
     break;
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_ADD_LO12:
     or32AArch64Imm(Loc, Val);
     break;
   default:
     error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
   }
 }
 
 void AArch64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   // TLSDESC Global-Dynamic relocation are in the form:
   //   adrp    x0, :tlsdesc:v             [R_AARCH64_TLSDESC_ADR_PAGE21]
   //   ldr     x1, [x0, #:tlsdesc_lo12:v  [R_AARCH64_TLSDESC_LD64_LO12]
   //   add     x0, x0, :tlsdesc_los:v     [R_AARCH64_TLSDESC_ADD_LO12]
   //   .tlsdesccall                       [R_AARCH64_TLSDESC_CALL]
   //   blr     x1
   // And it can optimized to:
   //   movz    x0, #0x0, lsl #16
   //   movk    x0, #0x10
   //   nop
   //   nop
   checkUInt<32>(Loc, Val, Type);
 
   switch (Type) {
   case R_AARCH64_TLSDESC_ADD_LO12:
   case R_AARCH64_TLSDESC_CALL:
     write32le(Loc, 0xd503201f); // nop
     return;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     write32le(Loc, 0xd2a00000 | (((Val >> 16) & 0xffff) << 5)); // movz
     return;
   case R_AARCH64_TLSDESC_LD64_LO12:
     write32le(Loc, 0xf2800000 | ((Val & 0xffff) << 5)); // movk
     return;
   default:
     llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
   }
 }
 
 void AArch64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   // TLSDESC Global-Dynamic relocation are in the form:
   //   adrp    x0, :tlsdesc:v             [R_AARCH64_TLSDESC_ADR_PAGE21]
   //   ldr     x1, [x0, #:tlsdesc_lo12:v  [R_AARCH64_TLSDESC_LD64_LO12]
   //   add     x0, x0, :tlsdesc_los:v     [R_AARCH64_TLSDESC_ADD_LO12]
   //   .tlsdesccall                       [R_AARCH64_TLSDESC_CALL]
   //   blr     x1
   // And it can optimized to:
   //   adrp    x0, :gottprel:v
   //   ldr     x0, [x0, :gottprel_lo12:v]
   //   nop
   //   nop
 
   switch (Type) {
   case R_AARCH64_TLSDESC_ADD_LO12:
   case R_AARCH64_TLSDESC_CALL:
     write32le(Loc, 0xd503201f); // nop
     break;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     write32le(Loc, 0x90000000); // adrp
     relocateOne(Loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, Val);
     break;
   case R_AARCH64_TLSDESC_LD64_LO12:
     write32le(Loc, 0xf9400000); // ldr
     relocateOne(Loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, Val);
     break;
   default:
     llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
   }
 }
 
 void AArch64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   checkUInt<32>(Loc, Val, Type);
 
   if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
     // Generate MOVZ.
     uint32_t RegNo = read32le(Loc) & 0x1f;
     write32le(Loc, (0xd2a00000 | RegNo) | (((Val >> 16) & 0xffff) << 5));
     return;
   }
   if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
     // Generate MOVK.
     uint32_t RegNo = read32le(Loc) & 0x1f;
     write32le(Loc, (0xf2800000 | RegNo) | ((Val & 0xffff) << 5));
     return;
   }
   llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
 }
 
 TargetInfo *elf::getAArch64TargetInfo() {
   static AArch64 Target;
   return &Target;
 }
Index: head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp
===================================================================
--- head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp	(revision 337281)
+++ head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp	(revision 337282)
@@ -1,543 +1,547 @@
 //===- X86.cpp ------------------------------------------------------------===//
 //
 //                             The LLVM Linker
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
 using namespace lld;
 using namespace lld::elf;
 
 namespace {
 class X86 : public TargetInfo {
 public:
   X86();
   RelExpr getRelExpr(RelType Type, const Symbol &S,
                      const uint8_t *Loc) const override;
   int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
   void writeGotPltHeader(uint8_t *Buf) const override;
   RelType getDynRel(RelType Type) const override;
   void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
   void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
   void writePltHeader(uint8_t *Buf) const override;
   void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
                 int32_t Index, unsigned RelOff) const override;
   void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
 
   RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
                           RelExpr Expr) const override;
   void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
   void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
 };
 } // namespace
 
 X86::X86() {
   GotBaseSymOff = -1;
   CopyRel = R_386_COPY;
   GotRel = R_386_GLOB_DAT;
   PltRel = R_386_JUMP_SLOT;
   IRelativeRel = R_386_IRELATIVE;
   RelativeRel = R_386_RELATIVE;
   TlsGotRel = R_386_TLS_TPOFF;
   TlsModuleIndexRel = R_386_TLS_DTPMOD32;
   TlsOffsetRel = R_386_TLS_DTPOFF32;
   GotEntrySize = 4;
   GotPltEntrySize = 4;
   PltEntrySize = 16;
   PltHeaderSize = 16;
   TlsGdRelaxSkip = 2;
   TrapInstr = 0xcccccccc; // 0xcc = INT3
+
+  // Align to the non-PAE large page size (known as a superpage or huge page).
+  // FreeBSD automatically promotes large, superpage-aligned allocations.
+  DefaultImageBase = 0x400000;
 }
 
 static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
 
 RelExpr X86::getRelExpr(RelType Type, const Symbol &S,
                         const uint8_t *Loc) const {
   switch (Type) {
   case R_386_8:
   case R_386_16:
   case R_386_32:
   case R_386_TLS_LDO_32:
     return R_ABS;
   case R_386_TLS_GD:
     return R_TLSGD;
   case R_386_TLS_LDM:
     return R_TLSLD;
   case R_386_PLT32:
     return R_PLT_PC;
   case R_386_PC8:
   case R_386_PC16:
   case R_386_PC32:
     return R_PC;
   case R_386_GOTPC:
     return R_GOTONLY_PC_FROM_END;
   case R_386_TLS_IE:
     return R_GOT;
   case R_386_GOT32:
   case R_386_GOT32X:
     // These relocations are arguably mis-designed because their calculations
     // depend on the instructions they are applied to. This is bad because we
     // usually don't care about whether the target section contains valid
     // machine instructions or not. But this is part of the documented ABI, so
     // we had to implement as the standard requires.
     //
     // x86 does not support PC-relative data access. Therefore, in order to
     // access GOT contents, a GOT address needs to be known at link-time
     // (which means non-PIC) or compilers have to emit code to get a GOT
     // address at runtime (which means code is position-independent but
     // compilers need to emit extra code for each GOT access.) This decision
     // is made at compile-time. In the latter case, compilers emit code to
     // load an GOT address to a register, which is usually %ebx.
     //
     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
     // foo@GOT(%reg).
     //
     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
     // find such relocation, we should report an error. foo@GOT is resolved to
     // an *absolute* address of foo's GOT entry, because both GOT address and
     // foo's offset are known. In other words, it's G + A.
     //
     // foo@GOT(%reg) needs to be resolved to a *relative* offset from a GOT to
     // foo's GOT entry in the table, because GOT address is not known but foo's
     // offset in the table is known. It's G + A - GOT.
     //
     // It's unfortunate that compilers emit the same relocation for these
     // different use cases. In order to distinguish them, we have to read a
     // machine instruction.
     //
     // The following code implements it. We assume that Loc[0] is the first
     // byte of a displacement or an immediate field of a valid machine
     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
     // the byte, we can determine whether the instruction is register-relative
     // (i.e. it was generated for foo@GOT(%reg)) or absolute (i.e. foo@GOT).
     return hasBaseReg(Loc[-1]) ? R_GOT_FROM_END : R_GOT;
   case R_386_TLS_GOTIE:
     return R_GOT_FROM_END;
   case R_386_GOTOFF:
     return R_GOTREL_FROM_END;
   case R_386_TLS_LE:
     return R_TLS;
   case R_386_TLS_LE_32:
     return R_NEG_TLS;
   case R_386_NONE:
     return R_NONE;
   default:
     return R_INVALID;
   }
 }
 
 RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
                              RelExpr Expr) const {
   switch (Expr) {
   default:
     return Expr;
   case R_RELAX_TLS_GD_TO_IE:
     return R_RELAX_TLS_GD_TO_IE_END;
   case R_RELAX_TLS_GD_TO_LE:
     return R_RELAX_TLS_GD_TO_LE_NEG;
   }
 }
 
 void X86::writeGotPltHeader(uint8_t *Buf) const {
   write32le(Buf, InX::Dynamic->getVA());
 }
 
 void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
   // Entries in .got.plt initially points back to the corresponding
   // PLT entries with a fixed offset to skip the first instruction.
   write32le(Buf, S.getPltVA() + 6);
 }
 
 void X86::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
   // An x86 entry is the address of the ifunc resolver function.
   write32le(Buf, S.getVA());
 }
 
 RelType X86::getDynRel(RelType Type) const {
   if (Type == R_386_TLS_LE)
     return R_386_TLS_TPOFF;
   if (Type == R_386_TLS_LE_32)
     return R_386_TLS_TPOFF32;
   return Type;
 }
 
 void X86::writePltHeader(uint8_t *Buf) const {
   if (Config->Pic) {
     const uint8_t V[] = {
         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx)
         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx)
         0x90, 0x90, 0x90, 0x90              // nop
     };
     memcpy(Buf, V, sizeof(V));
 
     uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
     uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
     write32le(Buf + 2, GotPlt + 4);
     write32le(Buf + 8, GotPlt + 8);
     return;
   }
 
   const uint8_t PltData[] = {
       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
       0x90, 0x90, 0x90, 0x90, // nop
   };
   memcpy(Buf, PltData, sizeof(PltData));
   uint32_t GotPlt = InX::GotPlt->getVA();
   write32le(Buf + 2, GotPlt + 4);
   write32le(Buf + 8, GotPlt + 8);
 }
 
 void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
                    uint64_t PltEntryAddr, int32_t Index,
                    unsigned RelOff) const {
   const uint8_t Inst[] = {
       0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx)
       0x68, 0, 0, 0, 0,       // pushl $reloc_offset
       0xe9, 0, 0, 0, 0,       // jmp .PLT0@PC
   };
   memcpy(Buf, Inst, sizeof(Inst));
 
   if (Config->Pic) {
     // jmp *foo@GOT(%ebx)
     uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
     Buf[1] = 0xa3;
     write32le(Buf + 2, GotPltEntryAddr - Ebx);
   } else {
     // jmp *foo_in_GOT
     Buf[1] = 0x25;
     write32le(Buf + 2, GotPltEntryAddr);
   }
 
   write32le(Buf + 7, RelOff);
   write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16);
 }
 
 int64_t X86::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
   switch (Type) {
   case R_386_8:
   case R_386_PC8:
     return SignExtend64<8>(*Buf);
   case R_386_16:
   case R_386_PC16:
     return SignExtend64<16>(read16le(Buf));
   case R_386_32:
   case R_386_GOT32:
   case R_386_GOT32X:
   case R_386_GOTOFF:
   case R_386_GOTPC:
   case R_386_PC32:
   case R_386_PLT32:
   case R_386_TLS_LDO_32:
   case R_386_TLS_LE:
     return SignExtend64<32>(read32le(Buf));
   default:
     return 0;
   }
 }
 
 void X86::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
   switch (Type) {
   case R_386_8:
     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
     // being used for some 16-bit programs such as boot loaders, so
     // we want to support them.
     checkUInt<8>(Loc, Val, Type);
     *Loc = Val;
     break;
   case R_386_PC8:
     checkInt<8>(Loc, Val, Type);
     *Loc = Val;
     break;
   case R_386_16:
     checkUInt<16>(Loc, Val, Type);
     write16le(Loc, Val);
     break;
   case R_386_PC16:
     // R_386_PC16 is normally used with 16 bit code. In that situation
     // the PC is 16 bits, just like the addend. This means that it can
     // point from any 16 bit address to any other if the possibility
     // of wrapping is included.
     // The only restriction we have to check then is that the destination
     // address fits in 16 bits. That is impossible to do here. The problem is
     // that we are passed the final value, which already had the
     // current location subtracted from it.
     // We just check that Val fits in 17 bits. This misses some cases, but
     // should have no false positives.
     checkInt<17>(Loc, Val, Type);
     write16le(Loc, Val);
     break;
   case R_386_32:
   case R_386_GLOB_DAT:
   case R_386_GOT32:
   case R_386_GOT32X:
   case R_386_GOTOFF:
   case R_386_GOTPC:
   case R_386_PC32:
   case R_386_PLT32:
   case R_386_RELATIVE:
   case R_386_TLS_DTPMOD32:
   case R_386_TLS_DTPOFF32:
   case R_386_TLS_GD:
   case R_386_TLS_GOTIE:
   case R_386_TLS_IE:
   case R_386_TLS_LDM:
   case R_386_TLS_LDO_32:
   case R_386_TLS_LE:
   case R_386_TLS_LE_32:
   case R_386_TLS_TPOFF:
   case R_386_TLS_TPOFF32:
     checkInt<32>(Loc, Val, Type);
     write32le(Loc, Val);
     break;
   default:
     error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
   }
 }
 
 void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   // Convert
   //   leal x@tlsgd(, %ebx, 1),
   //   call __tls_get_addr@plt
   // to
   //   movl %gs:0,%eax
   //   subl $x@ntpoff,%eax
   const uint8_t Inst[] = {
       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
       0x81, 0xe8, 0, 0, 0, 0,             // subl Val(%ebx), %eax
   };
   memcpy(Loc - 3, Inst, sizeof(Inst));
   write32le(Loc + 5, Val);
 }
 
 void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   // Convert
   //   leal x@tlsgd(, %ebx, 1),
   //   call __tls_get_addr@plt
   // to
   //   movl %gs:0, %eax
   //   addl x@gotntpoff(%ebx), %eax
   const uint8_t Inst[] = {
       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
       0x03, 0x83, 0, 0, 0, 0,             // addl Val(%ebx), %eax
   };
   memcpy(Loc - 3, Inst, sizeof(Inst));
   write32le(Loc + 5, Val);
 }
 
 // In some conditions, relocations can be optimized to avoid using GOT.
 // This function does that for Initial Exec to Local Exec case.
 void X86::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   // Ulrich's document section 6.2 says that @gotntpoff can
   // be used with MOVL or ADDL instructions.
   // @indntpoff is similar to @gotntpoff, but for use in
   // position dependent code.
   uint8_t Reg = (Loc[-1] >> 3) & 7;
 
   if (Type == R_386_TLS_IE) {
     if (Loc[-1] == 0xa1) {
       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
       // This case is different from the generic case below because
       // this is a 5 byte instruction while below is 6 bytes.
       Loc[-1] = 0xb8;
     } else if (Loc[-2] == 0x8b) {
       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
       Loc[-2] = 0xc7;
       Loc[-1] = 0xc0 | Reg;
     } else {
       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
       Loc[-2] = 0x81;
       Loc[-1] = 0xc0 | Reg;
     }
   } else {
     assert(Type == R_386_TLS_GOTIE);
     if (Loc[-2] == 0x8b) {
       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
       Loc[-2] = 0xc7;
       Loc[-1] = 0xc0 | Reg;
     } else {
       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
       Loc[-2] = 0x8d;
       Loc[-1] = 0x80 | (Reg << 3) | Reg;
     }
   }
   write32le(Loc, Val);
 }
 
 void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   if (Type == R_386_TLS_LDO_32) {
     write32le(Loc, Val);
     return;
   }
 
   // Convert
   //   leal foo(%reg),%eax
   //   call ___tls_get_addr
   // to
   //   movl %gs:0,%eax
   //   nop
   //   leal 0(%esi,1),%esi
   const uint8_t Inst[] = {
       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
       0x90,                               // nop
       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
   };
   memcpy(Loc - 2, Inst, sizeof(Inst));
 }
 
 namespace {
 class RetpolinePic : public X86 {
 public:
   RetpolinePic();
   void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
   void writePltHeader(uint8_t *Buf) const override;
   void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
                 int32_t Index, unsigned RelOff) const override;
 };
 
 class RetpolineNoPic : public X86 {
 public:
   RetpolineNoPic();
   void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
   void writePltHeader(uint8_t *Buf) const override;
   void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
                 int32_t Index, unsigned RelOff) const override;
 };
 } // namespace
 
 RetpolinePic::RetpolinePic() {
   PltHeaderSize = 48;
   PltEntrySize = 32;
 }
 
 void RetpolinePic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
   write32le(Buf, S.getPltVA() + 17);
 }
 
 void RetpolinePic::writePltHeader(uint8_t *Buf) const {
   const uint8_t Insn[] = {
       0xff, 0xb3, 0,    0,    0,    0,          // 0:    pushl GOTPLT+4(%ebx)
       0x50,                                     // 6:    pushl %eax
       0x8b, 0x83, 0,    0,    0,    0,          // 7:    mov GOTPLT+8(%ebx), %eax
       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
       0xf3, 0x90,                               // 12: loop: pause
       0x0f, 0xae, 0xe8,                         // 14:   lfence
       0xeb, 0xf9,                               // 17:   jmp loop
       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
       0x59,                                     // 2d:   pop %ecx
       0xc3,                                     // 2e:   ret
   };
   memcpy(Buf, Insn, sizeof(Insn));
 
   uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
   uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
   write32le(Buf + 2, GotPlt + 4);
   write32le(Buf + 9, GotPlt + 8);
 }
 
 void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
                             uint64_t PltEntryAddr, int32_t Index,
                             unsigned RelOff) const {
   const uint8_t Insn[] = {
       0x50,                   // pushl %eax
       0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
       0xe8, 0,    0, 0, 0,    // call plt+0x20
       0xe9, 0,    0, 0, 0,    // jmp plt+0x12
       0x68, 0,    0, 0, 0,    // pushl $reloc_offset
       0xe9, 0,    0, 0, 0,    // jmp plt+0
   };
   memcpy(Buf, Insn, sizeof(Insn));
 
   uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
   write32le(Buf + 3, GotPltEntryAddr - Ebx);
   write32le(Buf + 8, -Index * PltEntrySize - PltHeaderSize - 12 + 32);
   write32le(Buf + 13, -Index * PltEntrySize - PltHeaderSize - 17 + 18);
   write32le(Buf + 18, RelOff);
   write32le(Buf + 23, -Index * PltEntrySize - PltHeaderSize - 27);
 }
 
 RetpolineNoPic::RetpolineNoPic() {
   PltHeaderSize = 48;
   PltEntrySize = 32;
 }
 
 void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
   write32le(Buf, S.getPltVA() + 16);
 }
 
 void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
   const uint8_t PltData[] = {
       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
       0x50,                            // 6:    pushl %eax
       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
       0xf3, 0x90,                      // 11: loop: pause
       0x0f, 0xae, 0xe8,                // 13:   lfence
       0xeb, 0xf9,                      // 16:   jmp loop
       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
       0x59,                            // 2d:   pop %ecx
       0xc3,                            // 2e:   ret
   };
   memcpy(Buf, PltData, sizeof(PltData));
 
   uint32_t GotPlt = InX::GotPlt->getVA();
   write32le(Buf + 2, GotPlt + 4);
   write32le(Buf + 8, GotPlt + 8);
 }
 
 void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
                               uint64_t PltEntryAddr, int32_t Index,
                               unsigned RelOff) const {
   const uint8_t Insn[] = {
       0x50,             // 0:  pushl %eax
       0xa1, 0, 0, 0, 0, // 1:  mov foo_in_GOT, %eax
       0xe8, 0, 0, 0, 0, // 6:  call plt+0x20
       0xe9, 0, 0, 0, 0, // b:  jmp plt+0x11
       0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
       0xe9, 0, 0, 0, 0, // 15: jmp plt+0
   };
   memcpy(Buf, Insn, sizeof(Insn));
 
   write32le(Buf + 2, GotPltEntryAddr);
   write32le(Buf + 7, -Index * PltEntrySize - PltHeaderSize - 11 + 32);
   write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16 + 17);
   write32le(Buf + 17, RelOff);
   write32le(Buf + 22, -Index * PltEntrySize - PltHeaderSize - 26);
 }
 
 TargetInfo *elf::getX86TargetInfo() {
   if (Config->ZRetpolineplt) {
     if (Config->Pic) {
       static RetpolinePic T;
       return &T;
     }
     static RetpolineNoPic T;
     return &T;
   }
 
   static X86 T;
   return &T;
 }