Index: head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp =================================================================== --- head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp (revision 337281) +++ head/contrib/llvm/tools/lld/ELF/Arch/AArch64.cpp (revision 337282) @@ -1,426 +1,430 @@ //===- AArch64.cpp --------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Object/ELF.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; // Page(Expr) is the page address of the expression Expr, defined // as (Expr & ~0xFFF). (This applies even if the machine page size // supported by the platform has a different value.) uint64_t elf::getAArch64Page(uint64_t Expr) { return Expr & ~static_cast(0xFFF); } namespace { class AArch64 final : public TargetInfo { public: AArch64(); RelExpr getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const override; bool isPicRel(RelType Type) const override; void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; bool usesOnlyLowPageBits(RelType Type) const override; void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace AArch64::AArch64() { CopyRel = R_AARCH64_COPY; RelativeRel = R_AARCH64_RELATIVE; IRelativeRel = R_AARCH64_IRELATIVE; GotRel = R_AARCH64_GLOB_DAT; PltRel = R_AARCH64_JUMP_SLOT; TlsDescRel = R_AARCH64_TLSDESC; TlsGotRel = R_AARCH64_TLS_TPREL64; GotEntrySize = 8; GotPltEntrySize = 8; PltEntrySize = 16; PltHeaderSize = 32; DefaultMaxPageSize = 65536; + // Align to the 2 MiB page size (known as a superpage or huge page). + // FreeBSD automatically promotes 2 MiB-aligned allocations. + DefaultImageBase = 0x200000; + // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant // 1 of the tls structures and the tcb size is 16. TcbSize = 16; NeedsThunks = true; // See comment in Arch/ARM.cpp for a more detailed explanation of // ThunkSectionSpacing. For AArch64 the only branches we are permitted to // Thunk have a range of +/- 128 MiB ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000; } RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const { switch (Type) { case R_AARCH64_TLSDESC_ADR_PAGE21: return R_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSDESC_ADD_LO12: return R_TLSDESC; case R_AARCH64_TLSDESC_CALL: return R_TLSDESC_CALL; case R_AARCH64_TLSLE_ADD_TPREL_HI12: case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: return R_TLS; case R_AARCH64_CALL26: case R_AARCH64_CONDBR19: case R_AARCH64_JUMP26: case R_AARCH64_TSTBR14: return R_PLT_PC; case R_AARCH64_PREL16: case R_AARCH64_PREL32: case R_AARCH64_PREL64: case R_AARCH64_ADR_PREL_LO21: case R_AARCH64_LD_PREL_LO19: return R_PC; case R_AARCH64_ADR_PREL_PG_HI21: return R_PAGE_PC; case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: return R_GOT; case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: return R_GOT_PAGE_PC; case R_AARCH64_NONE: return R_NONE; default: return R_ABS; } } RelExpr AArch64::adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const { if (Expr == R_RELAX_TLS_GD_TO_IE) { if (Type == R_AARCH64_TLSDESC_ADR_PAGE21) return R_RELAX_TLS_GD_TO_IE_PAGE_PC; return R_RELAX_TLS_GD_TO_IE_ABS; } return Expr; } bool AArch64::usesOnlyLowPageBits(RelType Type) const { switch (Type) { default: return false; case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_LDST128_ABS_LO12_NC: case R_AARCH64_LDST16_ABS_LO12_NC: case R_AARCH64_LDST32_ABS_LO12_NC: case R_AARCH64_LDST64_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: return true; } } bool AArch64::isPicRel(RelType Type) const { return Type == R_AARCH64_ABS32 || Type == R_AARCH64_ABS64; } void AArch64::writeGotPlt(uint8_t *Buf, const Symbol &) const { write64le(Buf, InX::Plt->getVA()); } void AArch64::writePltHeader(uint8_t *Buf) const { const uint8_t PltData[] = { 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[2])) 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[2]))] 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[2])) 0x20, 0x02, 0x1f, 0xd6, // br x17 0x1f, 0x20, 0x03, 0xd5, // nop 0x1f, 0x20, 0x03, 0xd5, // nop 0x1f, 0x20, 0x03, 0xd5 // nop }; memcpy(Buf, PltData, sizeof(PltData)); uint64_t Got = InX::GotPlt->getVA(); uint64_t Plt = InX::Plt->getVA(); relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21, getAArch64Page(Got + 16) - getAArch64Page(Plt + 4)); relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16); relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16); } void AArch64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.plt.got[n])) 0x20, 0x02, 0x1f, 0xd6 // br x17 }; memcpy(Buf, Inst, sizeof(Inst)); relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21, getAArch64Page(GotPltEntryAddr) - getAArch64Page(PltEntryAddr)); relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotPltEntryAddr); relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr); } bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const { // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) return false; uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA(); return !inBranchRange(Type, BranchAddr, Dst); } bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26) return true; // The AArch64 call and unconditional branch instructions have a range of // +/- 128 MiB. uint64_t Range = 128 * 1024 * 1024; if (Dst > Src) { // Immediate of branch is signed. Range -= 4; return Dst - Src <= Range; } return Src - Dst <= Range; } static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi); } // Return the bits [Start, End] from Val shifted Start bits. // For instance, getBits(0xF0, 4, 8) returns 0xF. static uint64_t getBits(uint64_t Val, int Start, int End) { uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1; return (Val >> Start) & Mask; } static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } // Update the immediate field in a AARCH64 ldr, str, and add instruction. static void or32AArch64Imm(uint8_t *L, uint64_t Imm) { or32le(L, (Imm & 0xFFF) << 10); } void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { switch (Type) { case R_AARCH64_ABS16: case R_AARCH64_PREL16: checkIntUInt<16>(Loc, Val, Type); write16le(Loc, Val); break; case R_AARCH64_ABS32: case R_AARCH64_PREL32: checkIntUInt<32>(Loc, Val, Type); write32le(Loc, Val); break; case R_AARCH64_ABS64: case R_AARCH64_GLOB_DAT: case R_AARCH64_PREL64: write64le(Loc, Val); break; case R_AARCH64_ADD_ABS_LO12_NC: or32AArch64Imm(Loc, Val); break; case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: checkInt<33>(Loc, Val, Type); write32AArch64Addr(Loc, Val >> 12); break; case R_AARCH64_ADR_PREL_LO21: checkInt<21>(Loc, Val, Type); write32AArch64Addr(Loc, Val); break; case R_AARCH64_JUMP26: // Normally we would just write the bits of the immediate field, however // when patching instructions for the cpu errata fix -fix-cortex-a53-843419 // we want to replace a non-branch instruction with a branch immediate // instruction. By writing all the bits of the instruction including the // opcode and the immediate (0 001 | 01 imm26) we can do this // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of // the instruction we want to patch. write32le(Loc, 0x14000000); LLVM_FALLTHROUGH; case R_AARCH64_CALL26: checkInt<28>(Loc, Val, Type); or32le(Loc, (Val & 0x0FFFFFFC) >> 2); break; case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: checkAlignment<4>(Loc, Val, Type); checkInt<21>(Loc, Val, Type); or32le(Loc, (Val & 0x1FFFFC) << 3); break; case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: checkAlignment<8>(Loc, Val, Type); or32le(Loc, (Val & 0xFF8) << 7); break; case R_AARCH64_LDST8_ABS_LO12_NC: or32AArch64Imm(Loc, getBits(Val, 0, 11)); break; case R_AARCH64_LDST16_ABS_LO12_NC: checkAlignment<2>(Loc, Val, Type); or32AArch64Imm(Loc, getBits(Val, 1, 11)); break; case R_AARCH64_LDST32_ABS_LO12_NC: checkAlignment<4>(Loc, Val, Type); or32AArch64Imm(Loc, getBits(Val, 2, 11)); break; case R_AARCH64_LDST64_ABS_LO12_NC: checkAlignment<8>(Loc, Val, Type); or32AArch64Imm(Loc, getBits(Val, 3, 11)); break; case R_AARCH64_LDST128_ABS_LO12_NC: checkAlignment<16>(Loc, Val, Type); or32AArch64Imm(Loc, getBits(Val, 4, 11)); break; case R_AARCH64_MOVW_UABS_G0_NC: or32le(Loc, (Val & 0xFFFF) << 5); break; case R_AARCH64_MOVW_UABS_G1_NC: or32le(Loc, (Val & 0xFFFF0000) >> 11); break; case R_AARCH64_MOVW_UABS_G2_NC: or32le(Loc, (Val & 0xFFFF00000000) >> 27); break; case R_AARCH64_MOVW_UABS_G3: or32le(Loc, (Val & 0xFFFF000000000000) >> 43); break; case R_AARCH64_TSTBR14: checkInt<16>(Loc, Val, Type); or32le(Loc, (Val & 0xFFFC) << 3); break; case R_AARCH64_TLSLE_ADD_TPREL_HI12: checkInt<24>(Loc, Val, Type); or32AArch64Imm(Loc, Val >> 12); break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12: or32AArch64Imm(Loc, Val); break; default: error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } void AArch64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] // .tlsdesccall [R_AARCH64_TLSDESC_CALL] // blr x1 // And it can optimized to: // movz x0, #0x0, lsl #16 // movk x0, #0x10 // nop // nop checkUInt<32>(Loc, Val, Type); switch (Type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(Loc, 0xd503201f); // nop return; case R_AARCH64_TLSDESC_ADR_PAGE21: write32le(Loc, 0xd2a00000 | (((Val >> 16) & 0xffff) << 5)); // movz return; case R_AARCH64_TLSDESC_LD64_LO12: write32le(Loc, 0xf2800000 | ((Val & 0xffff) << 5)); // movk return; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } void AArch64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] // .tlsdesccall [R_AARCH64_TLSDESC_CALL] // blr x1 // And it can optimized to: // adrp x0, :gottprel:v // ldr x0, [x0, :gottprel_lo12:v] // nop // nop switch (Type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(Loc, 0xd503201f); // nop break; case R_AARCH64_TLSDESC_ADR_PAGE21: write32le(Loc, 0x90000000); // adrp relocateOne(Loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, Val); break; case R_AARCH64_TLSDESC_LD64_LO12: write32le(Loc, 0xf9400000); // ldr relocateOne(Loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, Val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } void AArch64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { checkUInt<32>(Loc, Val, Type); if (Type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { // Generate MOVZ. uint32_t RegNo = read32le(Loc) & 0x1f; write32le(Loc, (0xd2a00000 | RegNo) | (((Val >> 16) & 0xffff) << 5)); return; } if (Type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { // Generate MOVK. uint32_t RegNo = read32le(Loc) & 0x1f; write32le(Loc, (0xf2800000 | RegNo) | ((Val & 0xffff) << 5)); return; } llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); } TargetInfo *elf::getAArch64TargetInfo() { static AArch64 Target; return &Target; } Index: head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp =================================================================== --- head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp (revision 337281) +++ head/contrib/llvm/tools/lld/ELF/Arch/X86.cpp (revision 337282) @@ -1,543 +1,547 @@ //===- X86.cpp ------------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; namespace { class X86 : public TargetInfo { public: X86(); RelExpr getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const override; int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override; void writeGotPltHeader(uint8_t *Buf) const override; RelType getDynRel(RelType Type) const override; void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; }; } // namespace X86::X86() { GotBaseSymOff = -1; CopyRel = R_386_COPY; GotRel = R_386_GLOB_DAT; PltRel = R_386_JUMP_SLOT; IRelativeRel = R_386_IRELATIVE; RelativeRel = R_386_RELATIVE; TlsGotRel = R_386_TLS_TPOFF; TlsModuleIndexRel = R_386_TLS_DTPMOD32; TlsOffsetRel = R_386_TLS_DTPOFF32; GotEntrySize = 4; GotPltEntrySize = 4; PltEntrySize = 16; PltHeaderSize = 16; TlsGdRelaxSkip = 2; TrapInstr = 0xcccccccc; // 0xcc = INT3 + + // Align to the non-PAE large page size (known as a superpage or huge page). + // FreeBSD automatically promotes large, superpage-aligned allocations. + DefaultImageBase = 0x400000; } static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; } RelExpr X86::getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const { switch (Type) { case R_386_8: case R_386_16: case R_386_32: case R_386_TLS_LDO_32: return R_ABS; case R_386_TLS_GD: return R_TLSGD; case R_386_TLS_LDM: return R_TLSLD; case R_386_PLT32: return R_PLT_PC; case R_386_PC8: case R_386_PC16: case R_386_PC32: return R_PC; case R_386_GOTPC: return R_GOTONLY_PC_FROM_END; case R_386_TLS_IE: return R_GOT; case R_386_GOT32: case R_386_GOT32X: // These relocations are arguably mis-designed because their calculations // depend on the instructions they are applied to. This is bad because we // usually don't care about whether the target section contains valid // machine instructions or not. But this is part of the documented ABI, so // we had to implement as the standard requires. // // x86 does not support PC-relative data access. Therefore, in order to // access GOT contents, a GOT address needs to be known at link-time // (which means non-PIC) or compilers have to emit code to get a GOT // address at runtime (which means code is position-independent but // compilers need to emit extra code for each GOT access.) This decision // is made at compile-time. In the latter case, compilers emit code to // load an GOT address to a register, which is usually %ebx. // // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or // foo@GOT(%reg). // // foo@GOT is not usable in PIC. If we are creating a PIC output and if we // find such relocation, we should report an error. foo@GOT is resolved to // an *absolute* address of foo's GOT entry, because both GOT address and // foo's offset are known. In other words, it's G + A. // // foo@GOT(%reg) needs to be resolved to a *relative* offset from a GOT to // foo's GOT entry in the table, because GOT address is not known but foo's // offset in the table is known. It's G + A - GOT. // // It's unfortunate that compilers emit the same relocation for these // different use cases. In order to distinguish them, we have to read a // machine instruction. // // The following code implements it. We assume that Loc[0] is the first // byte of a displacement or an immediate field of a valid machine // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at // the byte, we can determine whether the instruction is register-relative // (i.e. it was generated for foo@GOT(%reg)) or absolute (i.e. foo@GOT). return hasBaseReg(Loc[-1]) ? R_GOT_FROM_END : R_GOT; case R_386_TLS_GOTIE: return R_GOT_FROM_END; case R_386_GOTOFF: return R_GOTREL_FROM_END; case R_386_TLS_LE: return R_TLS; case R_386_TLS_LE_32: return R_NEG_TLS; case R_386_NONE: return R_NONE; default: return R_INVALID; } } RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const { switch (Expr) { default: return Expr; case R_RELAX_TLS_GD_TO_IE: return R_RELAX_TLS_GD_TO_IE_END; case R_RELAX_TLS_GD_TO_LE: return R_RELAX_TLS_GD_TO_LE_NEG; } } void X86::writeGotPltHeader(uint8_t *Buf) const { write32le(Buf, InX::Dynamic->getVA()); } void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const { // Entries in .got.plt initially points back to the corresponding // PLT entries with a fixed offset to skip the first instruction. write32le(Buf, S.getPltVA() + 6); } void X86::writeIgotPlt(uint8_t *Buf, const Symbol &S) const { // An x86 entry is the address of the ifunc resolver function. write32le(Buf, S.getVA()); } RelType X86::getDynRel(RelType Type) const { if (Type == R_386_TLS_LE) return R_386_TLS_TPOFF; if (Type == R_386_TLS_LE_32) return R_386_TLS_TPOFF32; return Type; } void X86::writePltHeader(uint8_t *Buf) const { if (Config->Pic) { const uint8_t V[] = { 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx) 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx) 0x90, 0x90, 0x90, 0x90 // nop }; memcpy(Buf, V, sizeof(V)); uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize(); uint32_t GotPlt = InX::GotPlt->getVA() - Ebx; write32le(Buf + 2, GotPlt + 4); write32le(Buf + 8, GotPlt + 8); return; } const uint8_t PltData[] = { 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 0x90, 0x90, 0x90, 0x90, // nop }; memcpy(Buf, PltData, sizeof(PltData)); uint32_t GotPlt = InX::GotPlt->getVA(); write32le(Buf + 2, GotPlt + 4); write32le(Buf + 8, GotPlt + 8); } void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { 0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx) 0x68, 0, 0, 0, 0, // pushl $reloc_offset 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC }; memcpy(Buf, Inst, sizeof(Inst)); if (Config->Pic) { // jmp *foo@GOT(%ebx) uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize(); Buf[1] = 0xa3; write32le(Buf + 2, GotPltEntryAddr - Ebx); } else { // jmp *foo_in_GOT Buf[1] = 0x25; write32le(Buf + 2, GotPltEntryAddr); } write32le(Buf + 7, RelOff); write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); } int64_t X86::getImplicitAddend(const uint8_t *Buf, RelType Type) const { switch (Type) { case R_386_8: case R_386_PC8: return SignExtend64<8>(*Buf); case R_386_16: case R_386_PC16: return SignExtend64<16>(read16le(Buf)); case R_386_32: case R_386_GOT32: case R_386_GOT32X: case R_386_GOTOFF: case R_386_GOTPC: case R_386_PC32: case R_386_PLT32: case R_386_TLS_LDO_32: case R_386_TLS_LE: return SignExtend64<32>(read32le(Buf)); default: return 0; } } void X86::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { switch (Type) { case R_386_8: // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are // being used for some 16-bit programs such as boot loaders, so // we want to support them. checkUInt<8>(Loc, Val, Type); *Loc = Val; break; case R_386_PC8: checkInt<8>(Loc, Val, Type); *Loc = Val; break; case R_386_16: checkUInt<16>(Loc, Val, Type); write16le(Loc, Val); break; case R_386_PC16: // R_386_PC16 is normally used with 16 bit code. In that situation // the PC is 16 bits, just like the addend. This means that it can // point from any 16 bit address to any other if the possibility // of wrapping is included. // The only restriction we have to check then is that the destination // address fits in 16 bits. That is impossible to do here. The problem is // that we are passed the final value, which already had the // current location subtracted from it. // We just check that Val fits in 17 bits. This misses some cases, but // should have no false positives. checkInt<17>(Loc, Val, Type); write16le(Loc, Val); break; case R_386_32: case R_386_GLOB_DAT: case R_386_GOT32: case R_386_GOT32X: case R_386_GOTOFF: case R_386_GOTPC: case R_386_PC32: case R_386_PLT32: case R_386_RELATIVE: case R_386_TLS_DTPMOD32: case R_386_TLS_DTPOFF32: case R_386_TLS_GD: case R_386_TLS_GOTIE: case R_386_TLS_IE: case R_386_TLS_LDM: case R_386_TLS_LDO_32: case R_386_TLS_LE: case R_386_TLS_LE_32: case R_386_TLS_TPOFF: case R_386_TLS_TPOFF32: checkInt<32>(Loc, Val, Type); write32le(Loc, Val); break; default: error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); } } void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt // to // movl %gs:0,%eax // subl $x@ntpoff,%eax const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); write32le(Loc + 5, Val); } void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt // to // movl %gs:0, %eax // addl x@gotntpoff(%ebx), %eax const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); write32le(Loc + 5, Val); } // In some conditions, relocations can be optimized to avoid using GOT. // This function does that for Initial Exec to Local Exec case. void X86::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Ulrich's document section 6.2 says that @gotntpoff can // be used with MOVL or ADDL instructions. // @indntpoff is similar to @gotntpoff, but for use in // position dependent code. uint8_t Reg = (Loc[-1] >> 3) & 7; if (Type == R_386_TLS_IE) { if (Loc[-1] == 0xa1) { // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" // This case is different from the generic case below because // this is a 5 byte instruction while below is 6 bytes. Loc[-1] = 0xb8; } else if (Loc[-2] == 0x8b) { // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" Loc[-2] = 0xc7; Loc[-1] = 0xc0 | Reg; } else { // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" Loc[-2] = 0x81; Loc[-1] = 0xc0 | Reg; } } else { assert(Type == R_386_TLS_GOTIE); if (Loc[-2] == 0x8b) { // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" Loc[-2] = 0xc7; Loc[-1] = 0xc0 | Reg; } else { // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" Loc[-2] = 0x8d; Loc[-1] = 0x80 | (Reg << 3) | Reg; } } write32le(Loc, Val); } void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { if (Type == R_386_TLS_LDO_32) { write32le(Loc, Val); return; } // Convert // leal foo(%reg),%eax // call ___tls_get_addr // to // movl %gs:0,%eax // nop // leal 0(%esi,1),%esi const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 0x90, // nop 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi }; memcpy(Loc - 2, Inst, sizeof(Inst)); } namespace { class RetpolinePic : public X86 { public: RetpolinePic(); void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; }; class RetpolineNoPic : public X86 { public: RetpolineNoPic(); void writeGotPlt(uint8_t *Buf, const Symbol &S) const override; void writePltHeader(uint8_t *Buf) const override; void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; }; } // namespace RetpolinePic::RetpolinePic() { PltHeaderSize = 48; PltEntrySize = 32; } void RetpolinePic::writeGotPlt(uint8_t *Buf, const Symbol &S) const { write32le(Buf, S.getPltVA() + 17); } void RetpolinePic::writePltHeader(uint8_t *Buf) const { const uint8_t Insn[] = { 0xff, 0xb3, 0, 0, 0, 0, // 0: pushl GOTPLT+4(%ebx) 0x50, // 6: pushl %eax 0x8b, 0x83, 0, 0, 0, 0, // 7: mov GOTPLT+8(%ebx), %eax 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 0xf3, 0x90, // 12: loop: pause 0x0f, 0xae, 0xe8, // 14: lfence 0xeb, 0xf9, // 17: jmp loop 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 0x89, 0xc8, // 2b: mov %ecx, %eax 0x59, // 2d: pop %ecx 0xc3, // 2e: ret }; memcpy(Buf, Insn, sizeof(Insn)); uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize(); uint32_t GotPlt = InX::GotPlt->getVA() - Ebx; write32le(Buf + 2, GotPlt + 4); write32le(Buf + 9, GotPlt + 8); } void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Insn[] = { 0x50, // pushl %eax 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 0xe8, 0, 0, 0, 0, // call plt+0x20 0xe9, 0, 0, 0, 0, // jmp plt+0x12 0x68, 0, 0, 0, 0, // pushl $reloc_offset 0xe9, 0, 0, 0, 0, // jmp plt+0 }; memcpy(Buf, Insn, sizeof(Insn)); uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize(); write32le(Buf + 3, GotPltEntryAddr - Ebx); write32le(Buf + 8, -Index * PltEntrySize - PltHeaderSize - 12 + 32); write32le(Buf + 13, -Index * PltEntrySize - PltHeaderSize - 17 + 18); write32le(Buf + 18, RelOff); write32le(Buf + 23, -Index * PltEntrySize - PltHeaderSize - 27); } RetpolineNoPic::RetpolineNoPic() { PltHeaderSize = 48; PltEntrySize = 32; } void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const Symbol &S) const { write32le(Buf, S.getPltVA() + 16); } void RetpolineNoPic::writePltHeader(uint8_t *Buf) const { const uint8_t PltData[] = { 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 0x50, // 6: pushl %eax 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 0xf3, 0x90, // 11: loop: pause 0x0f, 0xae, 0xe8, // 13: lfence 0xeb, 0xf9, // 16: jmp loop 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 0x89, 0xc8, // 2b: mov %ecx, %eax 0x59, // 2d: pop %ecx 0xc3, // 2e: ret }; memcpy(Buf, PltData, sizeof(PltData)); uint32_t GotPlt = InX::GotPlt->getVA(); write32le(Buf + 2, GotPlt + 4); write32le(Buf + 8, GotPlt + 8); } void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Insn[] = { 0x50, // 0: pushl %eax 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 }; memcpy(Buf, Insn, sizeof(Insn)); write32le(Buf + 2, GotPltEntryAddr); write32le(Buf + 7, -Index * PltEntrySize - PltHeaderSize - 11 + 32); write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16 + 17); write32le(Buf + 17, RelOff); write32le(Buf + 22, -Index * PltEntrySize - PltHeaderSize - 26); } TargetInfo *elf::getX86TargetInfo() { if (Config->ZRetpolineplt) { if (Config->Pic) { static RetpolinePic T; return &T; } static RetpolineNoPic T; return &T; } static X86 T; return &T; }