Index: head/contrib/llvm-project/lld/ELF/Arch/PPC.cpp =================================================================== --- head/contrib/llvm-project/lld/ELF/Arch/PPC.cpp (revision 359083) +++ head/contrib/llvm-project/lld/ELF/Arch/PPC.cpp (revision 359084) @@ -1,469 +1,468 @@ //===- PPC.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; namespace lld { namespace elf { namespace { class PPC final : public TargetInfo { public: PPC(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; void writeGotHeader(uint8_t *buf) const override; void writePltHeader(uint8_t *buf) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } void writeIplt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; int getTlsGdRelaxSkip(RelType type) const override; void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; }; } // namespace static uint16_t lo(uint32_t v) { return v; } static uint16_t ha(uint32_t v) { return (v + 0x8000) >> 16; } static uint32_t readFromHalf16(const uint8_t *loc) { return read32(config->isLE ? loc : loc - 2); } static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // Create canonical PLT entries for non-PIE code. Compilers don't generate // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. uint32_t glink = in.plt->getVA(); // VA of .glink if (!config->isPic) { - for (const Symbol *sym : in.plt->entries) - if (sym->needsPltAddr) { - writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); - buf += 16; - glink += 16; - } + for (const Symbol *sym : cast(in.plt)->canonical_plts) { + writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); + buf += 16; + glink += 16; + } } // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. // // a) With immediate binding (BIND_NOW), the .plt entry is resolved at load // time. The .glink section is not used. // b) With lazy binding, the .plt entry points to a `b PLTresolve` // instruction in .glink, filled in by PPC::writeGotPlt(). // Write N `b PLTresolve` first. for (size_t i = 0; i != numEntries; ++i) write32(buf + 4 * i, 0x48000000 | 4 * (numEntries - i)); buf += 4 * numEntries; // Then write PLTresolve(), which has two forms: PIC and non-PIC. PLTresolve() // computes the PLT index (by computing the distance from the landing b to // itself) and calls _dl_runtime_resolve() (in glibc). uint32_t got = in.got->getVA(); const uint8_t *end = buf + 64; if (config->isPic) { uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12; uint32_t gotBcl = got + 4 - (glink + afterBcl); write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha write32(buf + 4, 0x7c0802a6); // mflr r0 write32(buf + 8, 0x429f0005); // bcl 20,30,.+4 write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l write32(buf + 16, 0x7d8802a6); // mflr r12 write32(buf + 20, 0x7c0803a6); // mtlr r0 write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12 write32(buf + 28, 0x3d8c0000 | ha(gotBcl)); // addis 12,12,GOT+4-1b@ha if (ha(gotBcl) == ha(gotBcl + 4)) { write32(buf + 32, 0x800c0000 | lo(gotBcl)); // lwz r0,r12,GOT+4-1b@l(r12) write32(buf + 36, 0x818c0000 | lo(gotBcl + 4)); // lwz r12,r12,GOT+8-1b@l(r12) } else { write32(buf + 32, 0x840c0000 | lo(gotBcl)); // lwzu r0,r12,GOT+4-1b@l(r12) write32(buf + 36, 0x818c0000 | 4); // lwz r12,r12,4(r12) } write32(buf + 40, 0x7c0903a6); // mtctr 0 write32(buf + 44, 0x7c0b5a14); // add r0,11,11 write32(buf + 48, 0x7d605a14); // add r11,0,11 write32(buf + 52, 0x4e800420); // bctr buf += 56; } else { write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-glink@ha if (ha(got + 4) == ha(got + 8)) write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12) else write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12) write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-glink@l write32(buf + 16, 0x7c0903a6); // mtctr r0 write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11 if (ha(got + 4) == ha(got + 8)) write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12) else write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12) write32(buf + 28, 0x7d605a14); // add r11,r0,r11 write32(buf + 32, 0x4e800420); // bctr buf += 36; } // Pad with nop. They should not be executed. for (; buf < end; buf += 4) write32(buf, 0x60000000); } PPC::PPC() { copyRel = R_PPC_COPY; gotRel = R_PPC_GLOB_DAT; noneRel = R_PPC_NONE; pltRel = R_PPC_JMP_SLOT; relativeRel = R_PPC_RELATIVE; iRelativeRel = R_PPC_IRELATIVE; symbolicRel = R_PPC_ADDR32; gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 3; gotPltHeaderEntriesNum = 0; pltHeaderSize = 0; pltEntrySize = 4; ipltEntrySize = 16; needsThunks = true; tlsModuleIndexRel = R_PPC_DTPMOD32; tlsOffsetRel = R_PPC_DTPREL32; tlsGotRel = R_PPC_TPREL32; defaultMaxPageSize = 65536; defaultImageBase = 0x10000000; write32(trapInstr.data(), 0x7fe00008); } void PPC::writeIplt(uint8_t *buf, const Symbol &sym, uint64_t /*pltEntryAddr*/) const { // In -pie or -shared mode, assume r30 points to .got2+0x8000, and use a // .got2.plt_pic32. thunk. writePPC32PltCallStub(buf, sym.getGotPltVA(), sym.file, 0x8000); } void PPC::writeGotHeader(uint8_t *buf) const { // _GLOBAL_OFFSET_TABLE_[0] = _DYNAMIC // glibc stores _dl_runtime_resolve in _GLOBAL_OFFSET_TABLE_[1], // link_map in _GLOBAL_OFFSET_TABLE_[2]. write32(buf, mainPart->dynamic->getVA()); } void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { // Address of the symbol resolver stub in .glink . write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex); } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const { if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) return true; if (s.isUndefWeak()) return false; return !PPC::inBranchRange(type, branchAddr, s.getVA(a)); } uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; } bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { uint64_t offset = dst - src; if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } RelExpr PPC::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { switch (type) { case R_PPC_NONE: return R_NONE; case R_PPC_ADDR16_HA: case R_PPC_ADDR16_HI: case R_PPC_ADDR16_LO: case R_PPC_ADDR32: return R_ABS; case R_PPC_DTPREL16: case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: case R_PPC_DTPREL32: return R_DTPREL; case R_PPC_REL14: case R_PPC_REL32: case R_PPC_REL16_LO: case R_PPC_REL16_HI: case R_PPC_REL16_HA: return R_PC; case R_PPC_GOT16: return R_GOT_OFF; case R_PPC_LOCAL24PC: case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: return R_PPC32_PLTREL; case R_PPC_GOT_TLSGD16: return R_TLSGD_GOT; case R_PPC_GOT_TLSLD16: return R_TLSLD_GOT; case R_PPC_GOT_TPREL16: return R_GOT_OFF; case R_PPC_TLS: return R_TLSIE_HINT; case R_PPC_TLSGD: return R_TLSDESC_CALL; case R_PPC_TLSLD: return R_TLSLD_HINT; case R_PPC_TPREL16: case R_PPC_TPREL16_HA: case R_PPC_TPREL16_LO: case R_PPC_TPREL16_HI: return R_TLS; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); return R_NONE; } } RelType PPC::getDynRel(RelType type) const { if (type == R_PPC_ADDR32) return type; return R_PPC_NONE; } static std::pair fromDTPREL(RelType type, uint64_t val) { uint64_t dtpBiasedVal = val - 0x8000; switch (type) { case R_PPC_DTPREL16: return {R_PPC64_ADDR16, dtpBiasedVal}; case R_PPC_DTPREL16_HA: return {R_PPC_ADDR16_HA, dtpBiasedVal}; case R_PPC_DTPREL16_HI: return {R_PPC_ADDR16_HI, dtpBiasedVal}; case R_PPC_DTPREL16_LO: return {R_PPC_ADDR16_LO, dtpBiasedVal}; case R_PPC_DTPREL32: return {R_PPC_ADDR32, dtpBiasedVal}; default: return {type, val}; } } void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { RelType newType; std::tie(newType, val) = fromDTPREL(type, val); switch (newType) { case R_PPC_ADDR16: checkIntUInt(loc, val, 16, type); write16(loc, val); break; case R_PPC_GOT16: case R_PPC_GOT_TLSGD16: case R_PPC_GOT_TLSLD16: case R_PPC_GOT_TPREL16: case R_PPC_TPREL16: checkInt(loc, val, 16, type); write16(loc, val); break; case R_PPC_ADDR16_HA: case R_PPC_DTPREL16_HA: case R_PPC_GOT_TLSGD16_HA: case R_PPC_GOT_TLSLD16_HA: case R_PPC_GOT_TPREL16_HA: case R_PPC_REL16_HA: case R_PPC_TPREL16_HA: write16(loc, ha(val)); break; case R_PPC_ADDR16_HI: case R_PPC_DTPREL16_HI: case R_PPC_GOT_TLSGD16_HI: case R_PPC_GOT_TLSLD16_HI: case R_PPC_GOT_TPREL16_HI: case R_PPC_REL16_HI: case R_PPC_TPREL16_HI: write16(loc, val >> 16); break; case R_PPC_ADDR16_LO: case R_PPC_DTPREL16_LO: case R_PPC_GOT_TLSGD16_LO: case R_PPC_GOT_TLSLD16_LO: case R_PPC_GOT_TPREL16_LO: case R_PPC_REL16_LO: case R_PPC_TPREL16_LO: write16(loc, val); break; case R_PPC_ADDR32: case R_PPC_REL32: write32(loc, val); break; case R_PPC_REL14: { uint32_t mask = 0x0000FFFC; checkInt(loc, val, 16, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC_REL24: case R_PPC_LOCAL24PC: case R_PPC_PLTREL24: { uint32_t mask = 0x03FFFFFC; checkInt(loc, val, 26, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } default: llvm_unreachable("unknown relocation"); } } RelExpr PPC::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; return expr; } int PPC::getTlsGdRelaxSkip(RelType type) const { // A __tls_get_addr call instruction is marked with 2 relocations: // // R_PPC_TLSGD / R_PPC_TLSLD: marker relocation // R_PPC_REL24: __tls_get_addr // // After the relaxation we no longer call __tls_get_addr and should skip both // relocations to not create a false dependence on __tls_get_addr being // defined. if (type == R_PPC_TLSGD || type == R_PPC_TLSLD) return 2; return 1; } void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSGD16: { // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA) uint32_t insn = readFromHalf16(loc); writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000)); relocateOne(loc, R_PPC_GOT_TPREL16, val); break; } case R_PPC_TLSGD: // bl __tls_get_addr(x@tldgd) --> add r3, r3, r2 write32(loc, 0x7c631214); break; default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSGD16: // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha writeFromHalf16(loc, 0x3c620000 | ha(val)); break; case R_PPC_TLSGD: // bl __tls_get_addr(x@tldgd) --> add r3, r3, x@tprel@l write32(loc, 0x38630000 | lo(val)); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSLD16: // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0 writeFromHalf16(loc, 0x3c620000); break; case R_PPC_TLSLD: // r3+x@dtprel computes r3+x-0x8000, while we want it to compute r3+x@tprel // = r3+x-0x7000, so add 4096 to r3. // bl __tls_get_addr(x@tlsld) --> addi r3, r3, 4096 write32(loc, 0x38631000); break; case R_PPC_DTPREL16: case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: relocateOne(loc, type, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TPREL16: { // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha uint32_t rt = readFromHalf16(loc) & 0x03e00000; writeFromHalf16(loc, 0x3c020000 | rt | ha(val)); break; } case R_PPC_TLS: { uint32_t insn = read32(loc); if (insn >> 26 != 31) error("unrecognized instruction for IE to LE R_PPC_TLS"); // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1); if (dFormOp == 0) error("unrecognized instruction for IE to LE R_PPC_TLS"); write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val)); break; } default: llvm_unreachable("unsupported relocation for TLS IE to LE relaxation"); } } TargetInfo *getPPCTargetInfo() { static PPC target; return ⌖ } } // namespace elf } // namespace lld Index: head/contrib/llvm-project/lld/ELF/Relocations.cpp =================================================================== --- head/contrib/llvm-project/lld/ELF/Relocations.cpp (revision 359083) +++ head/contrib/llvm-project/lld/ELF/Relocations.cpp (revision 359084) @@ -1,1968 +1,1969 @@ //===- Relocations.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains platform-independent functions to process relocations. // I'll describe the overview of this file here. // // Simple relocations are easy to handle for the linker. For example, // for R_X86_64_PC64 relocs, the linker just has to fix up locations // with the relative offsets to the target symbols. It would just be // reading records from relocation sections and applying them to output. // // But not all relocations are that easy to handle. For example, for // R_386_GOTOFF relocs, the linker has to create new GOT entries for // symbols if they don't exist, and fix up locations with GOT entry // offsets from the beginning of GOT section. So there is more than // fixing addresses in relocation processing. // // ELF defines a large number of complex relocations. // // The functions in this file analyze relocations and do whatever needs // to be done. It includes, but not limited to, the following. // // - create GOT/PLT entries // - create new relocations in .dynsym to let the dynamic linker resolve // them at runtime (since ELF supports dynamic linking, not all // relocations can be resolved at link-time) // - create COPY relocs and reserve space in .bss // - replace expensive relocs (in terms of runtime cost) with cheap ones // - error out infeasible combinations such as PIC and non-relative relocs // // Note that the functions in this file don't actually apply relocations // because it doesn't know about the output file nor the output file buffer. // It instead stores Relocation objects to InputSection's Relocations // vector to let it apply later in InputSection::writeTo. // //===----------------------------------------------------------------------===// #include "Relocations.h" #include "Config.h" #include "LinkerScript.h" #include "OutputSections.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; namespace lld { namespace elf { static Optional getLinkerScriptLocation(const Symbol &sym) { for (BaseCommand *base : script->sectionCommands) if (auto *cmd = dyn_cast(base)) if (cmd->sym == &sym) return cmd->location; return None; } // Construct a message in the following format. // // >>> defined in /home/alice/src/foo.o // >>> referenced by bar.c:12 (/home/alice/src/bar.c:12) // >>> /home/alice/src/bar.o:(.text+0x1) static std::string getLocation(InputSectionBase &s, const Symbol &sym, uint64_t off) { std::string msg = "\n>>> defined in "; if (sym.file) msg += toString(sym.file); else if (Optional loc = getLinkerScriptLocation(sym)) msg += *loc; msg += "\n>>> referenced by "; std::string src = s.getSrcMsg(sym, off); if (!src.empty()) msg += src + "\n>>> "; return msg + s.getObjMsg(off); } namespace { // Build a bitmask with one bit set for each RelExpr. // // Constexpr function arguments can't be used in static asserts, so we // use template arguments to build the mask. // But function template partial specializations don't exist (needed // for base case of the recursion), so we need a dummy struct. template struct RelExprMaskBuilder { static inline uint64_t build() { return 0; } }; // Specialization for recursive case. template struct RelExprMaskBuilder { static inline uint64_t build() { static_assert(0 <= Head && Head < 64, "RelExpr is too large for 64-bit mask!"); return (uint64_t(1) << Head) | RelExprMaskBuilder::build(); } }; } // namespace // Return true if `Expr` is one of `Exprs`. // There are fewer than 64 RelExpr's, so we can represent any set of // RelExpr's as a constant bit mask and test for membership with a // couple cheap bitwise operations. template bool oneof(RelExpr expr) { assert(0 <= expr && (int)expr < 64 && "RelExpr is too large for 64-bit mask!"); return (uint64_t(1) << expr) & RelExprMaskBuilder::build(); } // This function is similar to the `handleTlsRelocation`. MIPS does not // support any relaxations for TLS relocations so by factoring out MIPS // handling in to the separate function we can simplify the code and do not // pollute other `handleTlsRelocation` by MIPS `ifs` statements. // Mips has a custom MipsGotSection that handles the writing of GOT entries // without dynamic relocations. static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, uint64_t offset, int64_t addend, RelExpr expr) { if (expr == R_MIPS_TLSLD) { in.mipsGot->addTlsIndex(*c.file); c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } if (expr == R_MIPS_TLSGD) { in.mipsGot->addDynTlsEntry(*c.file, sym); c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } return 0; } // Notes about General Dynamic and Local Dynamic TLS models below. They may // require the generation of a pair of GOT entries that have associated dynamic // relocations. The pair of GOT entries created are of the form GOT[e0] Module // Index (Used to find pointer to TLS block at run-time) GOT[e1] Offset of // symbol in TLS block. // // Returns the number of relocations processed. template static unsigned handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, typename ELFT::uint offset, int64_t addend, RelExpr expr) { if (!sym.isTls()) return 0; if (config->emachine == EM_MIPS) return handleMipsTlsRelocation(type, sym, c, offset, addend, expr); if (oneof( expr) && config->shared) { if (in.got->addDynTlsEntry(sym)) { uint64_t off = in.got->getGlobalDynOffset(sym); mainPart->relaDyn->addReloc( {target->tlsDescRel, in.got, off, !sym.isPreemptible, &sym, 0}); } if (expr != R_TLSDESC_CALL) c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } bool canRelax = config->emachine != EM_ARM && config->emachine != EM_HEXAGON && config->emachine != EM_RISCV; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. // // ARM and RISC-V do not support any relaxations for TLS relocations, however, // we can omit the DTPMOD dynamic relocations and resolve them at link time // because them are always 1. This may be necessary for static linking as // DTPMOD may not be expected at load time. bool isLocalInExecutable = !sym.isPreemptible && !config->shared; // Local Dynamic is for access to module local TLS variables, while still // being suitable for being dynamically loaded via dlopen. GOT[e0] is the // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. if (oneof( expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. if (canRelax && !config->shared) { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); return target->getTlsGdRelaxSkip(type); } if (expr == R_TLSLD_HINT) return 1; if (in.got->addTlsIndex()) { if (isLocalInExecutable) in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); else mainPart->relaDyn->addReloc(target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff(), nullptr); } c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } // Local-Dynamic relocs can be relaxed to Local-Exec. if (expr == R_DTPREL && !config->shared) { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); return 1; } // Local-Dynamic sequence where offset of tls variable relative to dynamic // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. if (expr == R_TLSLD_GOT_OFF) { if (!sym.isInGot()) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); in.got->relocations.push_back( {R_ABS, target->tlsOffsetRel, off, 0, &sym}); } c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } if (oneof(expr)) { if (!canRelax || config->shared) { if (in.got->addDynTlsEntry(sym)) { uint64_t off = in.got->getGlobalDynOffset(sym); if (isLocalInExecutable) // Write one to the GOT slot. in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, off, 1, &sym}); else mainPart->relaDyn->addReloc(target->tlsModuleIndexRel, in.got, off, &sym); // If the symbol is preemptible we need the dynamic linker to write // the offset too. uint64_t offsetOff = off + config->wordsize; if (sym.isPreemptible) mainPart->relaDyn->addReloc(target->tlsOffsetRel, in.got, offsetOff, &sym); else in.got->relocations.push_back( {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); } c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. if (sym.isPreemptible) { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); if (!sym.isInGot()) { in.got->addEntry(sym); mainPart->relaDyn->addReloc(target->tlsGotRel, in.got, sym.getGotOffset(), &sym); } } else { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_GD_TO_LE), type, offset, addend, &sym}); } return target->getTlsGdRelaxSkip(type); } // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally // defined. if (oneof(expr) && canRelax && isLocalInExecutable) { c.relocations.push_back({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); return 1; } if (expr == R_TLSIE_HINT) return 1; return 0; } static RelType getMipsPairType(RelType type, bool isLocal) { switch (type) { case R_MIPS_HI16: return R_MIPS_LO16; case R_MIPS_GOT16: // In case of global symbol, the R_MIPS_GOT16 relocation does not // have a pair. Each global symbol has a unique entry in the GOT // and a corresponding instruction with help of the R_MIPS_GOT16 // relocation loads an address of the symbol. In case of local // symbol, the R_MIPS_GOT16 relocation creates a GOT entry to hold // the high 16 bits of the symbol's value. A paired R_MIPS_LO16 // relocations handle low 16 bits of the address. That allows // to allocate only one GOT entry for every 64 KBytes of local data. return isLocal ? R_MIPS_LO16 : R_MIPS_NONE; case R_MICROMIPS_GOT16: return isLocal ? R_MICROMIPS_LO16 : R_MIPS_NONE; case R_MIPS_PCHI16: return R_MIPS_PCLO16; case R_MICROMIPS_HI16: return R_MICROMIPS_LO16; default: return R_MIPS_NONE; } } // True if non-preemptable symbol always has the same value regardless of where // the DSO is loaded. static bool isAbsolute(const Symbol &sym) { if (sym.isUndefWeak()) return true; if (const auto *dr = dyn_cast(&sym)) return dr->section == nullptr; // Absolute symbol. return false; } static bool isAbsoluteValue(const Symbol &sym) { return isAbsolute(sym) || sym.isTls(); } // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { return oneof(expr); } // Returns true if Expr refers a GOT entry. Note that this function // returns false for TLS variables even though they need GOT, because // TLS variables uses GOT differently than the regular variables. static bool needsGot(RelExpr expr) { return oneof( expr); } // True if this expression is of the form Sym - X, where X is a position in the // file (PC, or GOT for example). static bool isRelExpr(RelExpr expr) { return oneof(expr); } // Returns true if a given relocation can be computed at link-time. // // For instance, we know the offset from a relocation to its target at // link-time if the relocation is PC-relative and refers a // non-interposable function in the same executable. This function // will return true for such relocation. // // If this function returns false, that means we need to emit a // dynamic relocation so that the relocation will be fixed at load-time. static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, InputSectionBase &s, uint64_t relOff) { // These expressions always compute a constant if (oneof( e)) return true; // These never do, except if the entire file is position dependent or if // only the low bits are used. if (e == R_GOT || e == R_PLT || e == R_TLSDESC) return target->usesOnlyLowPageBits(type) || !config->isPic; if (sym.isPreemptible) return false; if (!config->isPic) return true; // The size of a non preemptible symbol is a constant. if (e == R_SIZE) return true; // For the target and the relocation, we want to know if they are // absolute or relative. bool absVal = isAbsoluteValue(sym); bool relE = isRelExpr(e); if (absVal && !relE) return true; if (!absVal && relE) return true; if (!absVal && !relE) return target->usesOnlyLowPageBits(type); assert(absVal && relE); // Allow R_PLT_PC (optimized to R_PC here) to a hidden undefined weak symbol // in PIC mode. This is a little strange, but it allows us to link function // calls to such symbols (e.g. glibc/stdlib/exit.c:__run_exit_handlers). // Normally such a call will be guarded with a comparison, which will load a // zero from the GOT. if (sym.isUndefWeak()) return true; // We set the final symbols values for linker script defined symbols later. // They always can be computed as a link time constant. if (sym.scriptDefined) return true; error("relocation " + toString(type) + " cannot refer to absolute symbol: " + toString(sym) + getLocation(s, sym, relOff)); return true; } static RelExpr toPlt(RelExpr expr) { switch (expr) { case R_PPC64_CALL: return R_PPC64_CALL_PLT; case R_PC: return R_PLT_PC; case R_ABS: return R_PLT; default: return expr; } } static RelExpr fromPlt(RelExpr expr) { // We decided not to use a plt. Optimize a reference to the plt to a // reference to the symbol itself. switch (expr) { case R_PLT_PC: case R_PPC32_PLTREL: return R_PC; case R_PPC64_CALL_PLT: return R_PPC64_CALL; case R_PLT: return R_ABS; default: return expr; } } // Returns true if a given shared symbol is in a read-only segment in a DSO. template static bool isReadOnly(SharedSymbol &ss) { using Elf_Phdr = typename ELFT::Phdr; // Determine if the symbol is read-only by scanning the DSO's program headers. const SharedFile &file = ss.getFile(); for (const Elf_Phdr &phdr : check(file.template getObj().program_headers())) if ((phdr.p_type == ELF::PT_LOAD || phdr.p_type == ELF::PT_GNU_RELRO) && !(phdr.p_flags & ELF::PF_W) && ss.value >= phdr.p_vaddr && ss.value < phdr.p_vaddr + phdr.p_memsz) return true; return false; } // Returns symbols at the same offset as a given symbol, including SS itself. // // If two or more symbols are at the same offset, and at least one of // them are copied by a copy relocation, all of them need to be copied. // Otherwise, they would refer to different places at runtime. template static SmallSet getSymbolsAt(SharedSymbol &ss) { using Elf_Sym = typename ELFT::Sym; SharedFile &file = ss.getFile(); SmallSet ret; for (const Elf_Sym &s : file.template getGlobalELFSyms()) { if (s.st_shndx == SHN_UNDEF || s.st_shndx == SHN_ABS || s.getType() == STT_TLS || s.st_value != ss.value) continue; StringRef name = check(s.getName(file.getStringTable())); Symbol *sym = symtab->find(name); if (auto *alias = dyn_cast_or_null(sym)) ret.insert(alias); } return ret; } // When a symbol is copy relocated or we create a canonical plt entry, it is // effectively a defined symbol. In the case of copy relocation the symbol is // in .bss and in the case of a canonical plt entry it is in .plt. This function // replaces the existing symbol with a Defined pointing to the appropriate // location. static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, uint64_t size) { Symbol old = sym; sym.replace(Defined{sym.file, sym.getName(), sym.binding, sym.stOther, sym.type, value, size, sec}); sym.pltIndex = old.pltIndex; sym.gotIndex = old.gotIndex; sym.verdefIndex = old.verdefIndex; sym.exportDynamic = true; sym.isUsedInRegularObj = true; } // Reserve space in .bss or .bss.rel.ro for copy relocation. // // The copy relocation is pretty much a hack. If you use a copy relocation // in your program, not only the symbol name but the symbol's size, RW/RO // bit and alignment become part of the ABI. In addition to that, if the // symbol has aliases, the aliases become part of the ABI. That's subtle, // but if you violate that implicit ABI, that can cause very counter- // intuitive consequences. // // So, what is the copy relocation? It's for linking non-position // independent code to DSOs. In an ideal world, all references to data // exported by DSOs should go indirectly through GOT. But if object files // are compiled as non-PIC, all data references are direct. There is no // way for the linker to transform the code to use GOT, as machine // instructions are already set in stone in object files. This is where // the copy relocation takes a role. // // A copy relocation instructs the dynamic linker to copy data from a DSO // to a specified address (which is usually in .bss) at load-time. If the // static linker (that's us) finds a direct data reference to a DSO // symbol, it creates a copy relocation, so that the symbol can be // resolved as if it were in .bss rather than in a DSO. // // As you can see in this function, we create a copy relocation for the // dynamic linker, and the relocation contains not only symbol name but // various other information about the symbol. So, such attributes become a // part of the ABI. // // Note for application developers: I can give you a piece of advice if // you are writing a shared library. You probably should export only // functions from your library. You shouldn't export variables. // // As an example what can happen when you export variables without knowing // the semantics of copy relocations, assume that you have an exported // variable of type T. It is an ABI-breaking change to add new members at // end of T even though doing that doesn't change the layout of the // existing members. That's because the space for the new members are not // reserved in .bss unless you recompile the main program. That means they // are likely to overlap with other data that happens to be laid out next // to the variable in .bss. This kind of issue is sometimes very hard to // debug. What's a solution? Instead of exporting a variable V from a DSO, // define an accessor getV(). template static void addCopyRelSymbol(SharedSymbol &ss) { // Copy relocation against zero-sized symbol doesn't make sense. uint64_t symSize = ss.getSize(); if (symSize == 0 || ss.alignment == 0) fatal("cannot create a copy relocation for symbol " + toString(ss)); // See if this symbol is in a read-only segment. If so, preserve the symbol's // memory protection by reserving space in the .bss.rel.ro section. bool isRO = isReadOnly(ss); BssSection *sec = make(isRO ? ".bss.rel.ro" : ".bss", symSize, ss.alignment); OutputSection *osec = (isRO ? in.bssRelRo : in.bss)->getParent(); // At this point, sectionBases has been migrated to sections. Append sec to // sections. if (osec->sectionCommands.empty() || !isa(osec->sectionCommands.back())) osec->sectionCommands.push_back(make("")); auto *isd = cast(osec->sectionCommands.back()); isd->sections.push_back(sec); osec->commitSection(sec); // Look through the DSO's dynamic symbol table for aliases and create a // dynamic symbol for each one. This causes the copy relocation to correctly // interpose any aliases. for (SharedSymbol *sym : getSymbolsAt(ss)) replaceWithDefined(*sym, sec, 0, sym->size); mainPart->relaDyn->addReloc(target->copyRel, sec, 0, &ss); } // MIPS has an odd notion of "paired" relocations to calculate addends. // For example, if a relocation is of R_MIPS_HI16, there must be a // R_MIPS_LO16 relocation after that, and an addend is calculated using // the two relocations. template static int64_t computeMipsAddend(const RelTy &rel, const RelTy *end, InputSectionBase &sec, RelExpr expr, bool isLocal) { if (expr == R_MIPS_GOTREL && isLocal) return sec.getFile()->mipsGp0; // The ABI says that the paired relocation is used only for REL. // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf if (RelTy::IsRela) return 0; RelType type = rel.getType(config->isMips64EL); uint32_t pairTy = getMipsPairType(type, isLocal); if (pairTy == R_MIPS_NONE) return 0; const uint8_t *buf = sec.data().data(); uint32_t symIndex = rel.getSymbol(config->isMips64EL); // To make things worse, paired relocations might not be contiguous in // the relocation table, so we need to do linear search. *sigh* for (const RelTy *ri = &rel; ri != end; ++ri) if (ri->getType(config->isMips64EL) == pairTy && ri->getSymbol(config->isMips64EL) == symIndex) return target->getImplicitAddend(buf + ri->r_offset, pairTy); warn("can't find matching " + toString(pairTy) + " relocation for " + toString(type)); return 0; } // Returns an addend of a given relocation. If it is RELA, an addend // is in a relocation itself. If it is REL, we need to read it from an // input section. template static int64_t computeAddend(const RelTy &rel, const RelTy *end, InputSectionBase &sec, RelExpr expr, bool isLocal) { int64_t addend; RelType type = rel.getType(config->isMips64EL); if (RelTy::IsRela) { addend = getAddend(rel); } else { const uint8_t *buf = sec.data().data(); addend = target->getImplicitAddend(buf + rel.r_offset, type); } if (config->emachine == EM_PPC64 && config->isPic && type == R_PPC64_TOC) addend += getPPC64TocBase(); if (config->emachine == EM_MIPS) addend += computeMipsAddend(rel, end, sec, expr, isLocal); return addend; } // Custom error message if Sym is defined in a discarded section. template static std::string maybeReportDiscarded(Undefined &sym) { auto *file = dyn_cast_or_null>(sym.file); if (!file || !sym.discardedSecIdx || file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) return ""; ArrayRef> objSections = CHECK(file->getObj().sections(), file); std::string msg; if (sym.type == ELF::STT_SECTION) { msg = "relocation refers to a discarded section: "; msg += CHECK( file->getObj().getSectionName(&objSections[sym.discardedSecIdx]), file); } else { msg = "relocation refers to a symbol in a discarded section: " + toString(sym); } msg += "\n>>> defined in " + toString(file); Elf_Shdr_Impl elfSec = objSections[sym.discardedSecIdx - 1]; if (elfSec.sh_type != SHT_GROUP) return msg; // If the discarded section is a COMDAT. StringRef signature = file->getShtGroupSignature(objSections, elfSec); if (const InputFile *prevailing = symtab->comdatGroups.lookup(CachedHashStringRef(signature))) msg += "\n>>> section group signature: " + signature.str() + "\n>>> prevailing definition is in " + toString(prevailing); return msg; } // Undefined diagnostics are collected in a vector and emitted once all of // them are known, so that some postprocessing on the list of undefined symbols // can happen before lld emits diagnostics. struct UndefinedDiag { Symbol *sym; struct Loc { InputSectionBase *sec; uint64_t offset; }; std::vector locs; bool isWarning; }; static std::vector undefs; // Check whether the definition name def is a mangled function name that matches // the reference name ref. static bool canSuggestExternCForCXX(StringRef ref, StringRef def) { llvm::ItaniumPartialDemangler d; std::string name = def.str(); if (d.partialDemangle(name.c_str())) return false; char *buf = d.getFunctionName(nullptr, nullptr); if (!buf) return false; bool ret = ref == buf; free(buf); return ret; } // Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns // the suggested symbol, which is either in the symbol table, or in the same // file of sym. template static const Symbol *getAlternativeSpelling(const Undefined &sym, std::string &pre_hint, std::string &post_hint) { DenseMap map; if (auto *file = dyn_cast_or_null>(sym.file)) { // If sym is a symbol defined in a discarded section, maybeReportDiscarded() // will give an error. Don't suggest an alternative spelling. if (file && sym.discardedSecIdx != 0 && file->getSections()[sym.discardedSecIdx] == &InputSection::discarded) return nullptr; // Build a map of local defined symbols. for (const Symbol *s : sym.file->getSymbols()) if (s->isLocal() && s->isDefined()) map.try_emplace(s->getName(), s); } auto suggest = [&](StringRef newName) -> const Symbol * { // If defined locally. if (const Symbol *s = map.lookup(newName)) return s; // If in the symbol table and not undefined. if (const Symbol *s = symtab->find(newName)) if (!s->isUndefined()) return s; return nullptr; }; // This loop enumerates all strings of Levenshtein distance 1 as typo // correction candidates and suggests the one that exists as a non-undefined // symbol. StringRef name = sym.getName(); for (size_t i = 0, e = name.size(); i != e + 1; ++i) { // Insert a character before name[i]. std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str(); for (char c = '0'; c <= 'z'; ++c) { newName[i] = c; if (const Symbol *s = suggest(newName)) return s; } if (i == e) break; // Substitute name[i]. newName = name; for (char c = '0'; c <= 'z'; ++c) { newName[i] = c; if (const Symbol *s = suggest(newName)) return s; } // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is // common. if (i + 1 < e) { newName[i] = name[i + 1]; newName[i + 1] = name[i]; if (const Symbol *s = suggest(newName)) return s; } // Delete name[i]. newName = (name.substr(0, i) + name.substr(i + 1)).str(); if (const Symbol *s = suggest(newName)) return s; } // Case mismatch, e.g. Foo vs FOO. for (auto &it : map) if (name.equals_lower(it.first)) return it.second; for (Symbol *sym : symtab->symbols()) if (!sym->isUndefined() && name.equals_lower(sym->getName())) return sym; // The reference may be a mangled name while the definition is not. Suggest a // missing extern "C". if (name.startswith("_Z")) { std::string buf = name.str(); llvm::ItaniumPartialDemangler d; if (!d.partialDemangle(buf.c_str())) if (char *buf = d.getFunctionName(nullptr, nullptr)) { const Symbol *s = suggest(buf); free(buf); if (s) { pre_hint = ": extern \"C\" "; return s; } } } else { const Symbol *s = nullptr; for (auto &it : map) if (canSuggestExternCForCXX(name, it.first)) { s = it.second; break; } if (!s) for (Symbol *sym : symtab->symbols()) if (canSuggestExternCForCXX(name, sym->getName())) { s = sym; break; } if (s) { pre_hint = " to declare "; post_hint = " as extern \"C\"?"; return s; } } return nullptr; } template static void reportUndefinedSymbol(const UndefinedDiag &undef, bool correctSpelling) { Symbol &sym = *undef.sym; auto visibility = [&]() -> std::string { switch (sym.visibility) { case STV_INTERNAL: return "internal "; case STV_HIDDEN: return "hidden "; case STV_PROTECTED: return "protected "; default: return ""; } }; std::string msg = maybeReportDiscarded(cast(sym)); if (msg.empty()) msg = "undefined " + visibility() + "symbol: " + toString(sym); const size_t maxUndefReferences = 10; size_t i = 0; for (UndefinedDiag::Loc l : undef.locs) { if (i >= maxUndefReferences) break; InputSectionBase &sec = *l.sec; uint64_t offset = l.offset; msg += "\n>>> referenced by "; std::string src = sec.getSrcMsg(sym, offset); if (!src.empty()) msg += src + "\n>>> "; msg += sec.getObjMsg(offset); i++; } if (i < undef.locs.size()) msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times") .str(); if (correctSpelling) { std::string pre_hint = ": ", post_hint; if (const Symbol *corrected = getAlternativeSpelling( cast(sym), pre_hint, post_hint)) { msg += "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint; if (corrected->file) msg += "\n>>> defined in: " + toString(corrected->file); } } if (sym.getName().startswith("_ZTV")) msg += "\nthe vtable symbol may be undefined because the class is missing " "its key function (see https://lld.llvm.org/missingkeyfunction)"; if (undef.isWarning) warn(msg); else error(msg); } template void reportUndefinedSymbols() { // Find the first "undefined symbol" diagnostic for each diagnostic, and // collect all "referenced from" lines at the first diagnostic. DenseMap firstRef; for (UndefinedDiag &undef : undefs) { assert(undef.locs.size() == 1); if (UndefinedDiag *canon = firstRef.lookup(undef.sym)) { canon->locs.push_back(undef.locs[0]); undef.locs.clear(); } else firstRef[undef.sym] = &undef; } // Enable spell corrector for the first 2 diagnostics. for (auto it : enumerate(undefs)) if (!it.value().locs.empty()) reportUndefinedSymbol(it.value(), it.index() < 2); undefs.clear(); } // Report an undefined symbol if necessary. // Returns true if the undefined symbol will produce an error message. static bool maybeReportUndefined(Symbol &sym, InputSectionBase &sec, uint64_t offset) { if (!sym.isUndefined() || sym.isWeak()) return false; bool canBeExternal = !sym.isLocal() && sym.visibility == STV_DEFAULT; if (config->unresolvedSymbols == UnresolvedPolicy::Ignore && canBeExternal) return false; // clang (as of 2019-06-12) / gcc (as of 8.2.1) PPC64 may emit a .rela.toc // which references a switch table in a discarded .rodata/.text section. The // .toc and the .rela.toc are incorrectly not placed in the comdat. The ELF // spec says references from outside the group to a STB_LOCAL symbol are not // allowed. Work around the bug. if (config->emachine == EM_PPC64 && cast(sym).discardedSecIdx != 0 && sec.name == ".toc") return false; bool isWarning = (config->unresolvedSymbols == UnresolvedPolicy::Warn && canBeExternal) || config->noinhibitExec; undefs.push_back({&sym, {{&sec, offset}}, isWarning}); return !isWarning; } // MIPS N32 ABI treats series of successive relocations with the same offset // as a single relocation. The similar approach used by N64 ABI, but this ABI // packs all relocations into the single relocation record. Here we emulate // this for the N32 ABI. Iterate over relocation with the same offset and put // theirs types into the single bit-set. template static RelType getMipsN32RelType(RelTy *&rel, RelTy *end) { RelType type = 0; uint64_t offset = rel->r_offset; int n = 0; while (rel != end && rel->r_offset == offset) type |= (rel++)->getType(config->isMips64EL) << (8 * n++); return type; } // .eh_frame sections are mergeable input sections, so their input // offsets are not linearly mapped to output section. For each input // offset, we need to find a section piece containing the offset and // add the piece's base address to the input offset to compute the // output offset. That isn't cheap. // // This class is to speed up the offset computation. When we process // relocations, we access offsets in the monotonically increasing // order. So we can optimize for that access pattern. // // For sections other than .eh_frame, this class doesn't do anything. namespace { class OffsetGetter { public: explicit OffsetGetter(InputSectionBase &sec) { if (auto *eh = dyn_cast(&sec)) pieces = eh->pieces; } // Translates offsets in input sections to offsets in output sections. // Given offset must increase monotonically. We assume that Piece is // sorted by inputOff. uint64_t get(uint64_t off) { if (pieces.empty()) return off; while (i != pieces.size() && pieces[i].inputOff + pieces[i].size <= off) ++i; if (i == pieces.size()) fatal(".eh_frame: relocation is not in any piece"); // Pieces must be contiguous, so there must be no holes in between. assert(pieces[i].inputOff <= off && "Relocation not in any piece"); // Offset -1 means that the piece is dead (i.e. garbage collected). if (pieces[i].outputOff == -1) return -1; return pieces[i].outputOff + off - pieces[i].inputOff; } private: ArrayRef pieces; size_t i = 0; }; } // namespace static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, Symbol *sym, int64_t addend, RelExpr expr, RelType type) { Partition &part = isec->getPartition(); // Add a relative relocation. If relrDyn section is enabled, and the // relocation offset is guaranteed to be even, add the relocation to // the relrDyn section, otherwise add it to the relaDyn section. // relrDyn sections don't support odd offsets. Also, relrDyn sections // don't store the addend values, so we must write it to the relocated // address. if (part.relrDyn && isec->alignment >= 2 && offsetInSec % 2 == 0) { isec->relocations.push_back({expr, type, offsetInSec, addend, sym}); part.relrDyn->relocs.push_back({isec, offsetInSec}); return; } part.relaDyn->addReloc(target->relativeRel, isec, offsetInSec, sym, addend, expr, type); } template static void addPltEntry(PltSection *plt, GotPltSection *gotPlt, RelocationBaseSection *rel, RelType type, Symbol &sym) { plt->addEntry(sym); gotPlt->addEntry(sym); rel->addReloc( {type, gotPlt, sym.getGotPltOffset(), !sym.isPreemptible, &sym, 0}); } static void addGotEntry(Symbol &sym) { in.got->addEntry(sym); RelExpr expr = sym.isTls() ? R_TLS : R_ABS; uint64_t off = sym.getGotOffset(); // If a GOT slot value can be calculated at link-time, which is now, // we can just fill that out. // // (We don't actually write a value to a GOT slot right now, but we // add a static relocation to a Relocations vector so that // InputSection::relocate will do the work for us. We may be able // to just write a value now, but it is a TODO.) bool isLinkTimeConstant = !sym.isPreemptible && (!config->isPic || isAbsolute(sym)); if (isLinkTimeConstant) { in.got->relocations.push_back({expr, target->symbolicRel, off, 0, &sym}); return; } // Otherwise, we emit a dynamic relocation to .rel[a].dyn so that // the GOT slot will be fixed at load-time. if (!sym.isTls() && !sym.isPreemptible && config->isPic && !isAbsolute(sym)) { addRelativeReloc(in.got, off, &sym, 0, R_ABS, target->symbolicRel); return; } mainPart->relaDyn->addReloc( sym.isTls() ? target->tlsGotRel : target->gotRel, in.got, off, &sym, 0, sym.isPreemptible ? R_ADDEND : R_ABS, target->symbolicRel); } // Return true if we can define a symbol in the executable that // contains the value/function of a symbol defined in a shared // library. static bool canDefineSymbolInExecutable(Symbol &sym) { // If the symbol has default visibility the symbol defined in the // executable will preempt it. // Note that we want the visibility of the shared symbol itself, not // the visibility of the symbol in the output file we are producing. That is // why we use Sym.stOther. if ((sym.stOther & 0x3) == STV_DEFAULT) return true; // If we are allowed to break address equality of functions, defining // a plt entry will allow the program to call the function in the // .so, but the .so and the executable will no agree on the address // of the function. Similar logic for objects. return ((sym.isFunc() && config->ignoreFunctionAddressEquality) || (sym.isObject() && config->ignoreDataAddressEquality)); } // The reason we have to do this early scan is as follows // * To mmap the output file, we need to know the size // * For that, we need to know how many dynamic relocs we will have. // It might be possible to avoid this by outputting the file with write: // * Write the allocated output sections, computing addresses. // * Apply relocations, recording which ones require a dynamic reloc. // * Write the dynamic relocations. // * Write the rest of the file. // This would have some drawbacks. For example, we would only know if .rela.dyn // is needed after applying relocations. If it is, it will go after rw and rx // sections. Given that it is ro, we will need an extra PT_LOAD. This // complicates things for the dynamic linker and means we would have to reserve // space for the extra PT_LOAD even if we end up not using it. template static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, uint64_t offset, Symbol &sym, const RelTy &rel, int64_t addend) { // If the relocation is known to be a link-time constant, we know no dynamic // relocation will be created, pass the control to relocateAlloc() or // relocateNonAlloc() to resolve it. // // The behavior of an undefined weak reference is implementation defined. If // the relocation is to a weak undef, and we are producing an executable, let // relocate{,Non}Alloc() resolve it. if (isStaticLinkTimeConstant(expr, type, sym, sec, offset) || (!config->shared && sym.isUndefWeak())) { sec.relocations.push_back({expr, type, offset, addend, &sym}); return; } bool canWrite = (sec.flags & SHF_WRITE) || !config->zText; if (canWrite) { RelType rel = target->getDynRel(type); if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) { addRelativeReloc(&sec, offset, &sym, addend, expr, type); return; } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target->symbolicRel) rel = target->relativeRel; sec.getPartition().relaDyn->addReloc(rel, &sec, offset, &sym, addend, R_ADDEND, type); // MIPS ABI turns using of GOT and dynamic relocations inside out. // While regular ABI uses dynamic relocations to fill up GOT entries // MIPS ABI requires dynamic linker to fills up GOT entries using // specially sorted dynamic symbol table. This affects even dynamic // relocations against symbols which do not require GOT entries // creation explicitly, i.e. do not have any GOT-relocations. So if // a preemptible symbol has a dynamic relocation we anyway have // to create a GOT entry for it. // If a non-preemptible symbol has a dynamic relocation against it, // dynamic linker takes it st_value, adds offset and writes down // result of the dynamic relocation. In case of preemptible symbol // dynamic linker performs symbol resolution, writes the symbol value // to the GOT entry and reads the GOT entry when it needs to perform // a dynamic relocation. // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19 if (config->emachine == EM_MIPS) in.mipsGot->addEntry(*sec.file, sym, addend, expr); return; } } // When producing an executable, we can perform copy relocations (for // STT_OBJECT) and canonical PLT (for STT_FUNC). if (!config->shared) { if (!canDefineSymbolInExecutable(sym)) { errorOrWarn("cannot preempt symbol: " + toString(sym) + getLocation(sec, sym, offset)); return; } if (sym.isObject()) { // Produce a copy relocation. if (auto *ss = dyn_cast(&sym)) { if (!config->zCopyreloc) error("unresolvable relocation " + toString(type) + " against symbol '" + toString(*ss) + "'; recompile with -fPIC or remove '-z nocopyreloc'" + getLocation(sec, sym, offset)); addCopyRelSymbol(*ss); } sec.relocations.push_back({expr, type, offset, addend, &sym}); return; } // This handles a non PIC program call to function in a shared library. In // an ideal world, we could just report an error saying the relocation can // overflow at runtime. In the real world with glibc, crt1.o has a // R_X86_64_PC32 pointing to libc.so. // // The general idea on how to handle such cases is to create a PLT entry and // use that as the function value. // // For the static linking part, we just return a plt expr and everything // else will use the PLT entry as the address. // // The remaining problem is making sure pointer equality still works. We // need the help of the dynamic linker for that. We let it know that we have // a direct reference to a so symbol by creating an undefined symbol with a // non zero st_value. Seeing that, the dynamic linker resolves the symbol to // the value of the symbol we created. This is true even for got entries, so // pointer equality is maintained. To avoid an infinite loop, the only entry // that points to the real function is a dedicated got entry used by the // plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT, // R_386_JMP_SLOT, etc). // For position independent executable on i386, the plt entry requires ebx // to be set. This causes two problems: // * If some code has a direct reference to a function, it was probably // compiled without -fPIE/-fPIC and doesn't maintain ebx. // * If a library definition gets preempted to the executable, it will have // the wrong ebx value. if (sym.isFunc()) { if (config->pie && config->emachine == EM_386) errorOrWarn("symbol '" + toString(sym) + "' cannot be preempted; recompile with -fPIE" + getLocation(sec, sym, offset)); if (!sym.isInPlt()) addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); if (!sym.isDefined()) { replaceWithDefined( sym, in.plt, target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0); if (config->emachine == EM_PPC) { // PPC32 canonical PLT entries are at the beginning of .glink cast(sym).value = in.plt->headerSize; in.plt->headerSize += 16; + cast(in.plt)->canonical_plts.push_back(&sym); } } sym.needsPltAddr = true; sec.relocations.push_back({expr, type, offset, addend, &sym}); return; } } if (config->isPic) { if (!canWrite && !isRelExpr(expr)) errorOrWarn( "can't create dynamic relocation " + toString(type) + " against " + (sym.getName().empty() ? "local symbol" : "symbol: " + toString(sym)) + " in readonly segment; recompile object files with -fPIC " "or pass '-Wl,-z,notext' to allow text relocations in the output" + getLocation(sec, sym, offset)); else errorOrWarn( "relocation " + toString(type) + " cannot be used against " + (sym.getName().empty() ? "local symbol" : "symbol " + toString(sym)) + "; recompile with -fPIC" + getLocation(sec, sym, offset)); return; } errorOrWarn("symbol '" + toString(sym) + "' has no type" + getLocation(sec, sym, offset)); } template static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, RelTy *end) { const RelTy &rel = *i; uint32_t symIndex = rel.getSymbol(config->isMips64EL); Symbol &sym = sec.getFile()->getSymbol(symIndex); RelType type; // Deal with MIPS oddity. if (config->mipsN32Abi) { type = getMipsN32RelType(i, end); } else { type = rel.getType(config->isMips64EL); ++i; } // Get an offset in an output section this relocation is applied to. uint64_t offset = getOffset.get(rel.r_offset); if (offset == uint64_t(-1)) return; // Error if the target symbol is undefined. Symbol index 0 may be used by // marker relocations, e.g. R_*_NONE and R_ARM_V4BX. Don't error on them. if (symIndex != 0 && maybeReportUndefined(sym, sec, rel.r_offset)) return; const uint8_t *relocatedAddr = sec.data().begin() + rel.r_offset; RelExpr expr = target->getRelExpr(type, sym, relocatedAddr); // Ignore R_*_NONE and other marker relocations. if (expr == R_NONE) return; // We can separate the small code model relocations into 2 categories: // 1) Those that access the compiler generated .toc sections. // 2) Those that access the linker allocated got entries. // lld allocates got entries to symbols on demand. Since we don't try to sort // the got entries in any way, we don't have to track which objects have // got-based small code model relocs. The .toc sections get placed after the // end of the linker allocated .got section and we do sort those so sections // addressed with small code model relocations come first. if (config->emachine == EM_PPC64 && isPPC64SmallCodeModelTocReloc(type)) sec.file->ppc64SmallCodeModelTocRelocs = true; if (sym.isGnuIFunc() && !config->zText && config->warnIfuncTextrel) { warn("using ifunc symbols when text relocations are allowed may produce " "a binary that will segfault, if the object file is linked with " "old version of glibc (glibc 2.28 and earlier). If this applies to " "you, consider recompiling the object files without -fPIC and " "without -Wl,-z,notext option. Use -no-warn-ifunc-textrel to " "turn off this warning." + getLocation(sec, sym, offset)); } // Read an addend. int64_t addend = computeAddend(rel, end, sec, expr, sym.isLocal()); // Relax relocations. // // If we know that a PLT entry will be resolved within the same ELF module, we // can skip PLT access and directly jump to the destination function. For // example, if we are linking a main executable, all dynamic symbols that can // be resolved within the executable will actually be resolved that way at // runtime, because the main executable is always at the beginning of a search // list. We can leverage that fact. if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) { if (expr == R_GOT_PC && !isAbsoluteValue(sym)) { expr = target->adjustRelaxExpr(type, relocatedAddr, expr); } else { // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call // stub type. It should be ignored if optimized to R_PC. if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) addend &= ~0x8000; expr = fromPlt(expr); } } // If the relocation does not emit a GOT or GOTPLT entry but its computation // uses their addresses, we need GOT or GOTPLT to be created. // // The 4 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(expr)) { in.gotPlt->hasGotPltOffRel = true; } else if (oneof( expr)) { in.got->hasGotOffRel = true; } // Process some TLS relocations, including relaxing TLS relocations. // Note that this function does not handle all TLS relocations. if (unsigned processed = handleTlsRelocation(type, sym, sec, offset, addend, expr)) { i += (processed - 1); return; } // We were asked not to generate PLT entries for ifuncs. Instead, pass the // direct relocation on through. if (sym.isGnuIFunc() && config->zIfuncNoplt) { sym.exportDynamic = true; mainPart->relaDyn->addReloc(type, &sec, offset, &sym, addend, R_ADDEND, type); return; } // Non-preemptible ifuncs require special handling. First, handle the usual // case where the symbol isn't one of these. if (!sym.isGnuIFunc() || sym.isPreemptible) { // If a relocation needs PLT, we create PLT and GOTPLT slots for the symbol. if (needsPlt(expr) && !sym.isInPlt()) addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); // Create a GOT slot if a relocation needs GOT. if (needsGot(expr)) { if (config->emachine == EM_MIPS) { // MIPS ABI has special rules to process GOT entries and doesn't // require relocation entries for them. A special case is TLS // relocations. In that case dynamic loader applies dynamic // relocations to initialize TLS GOT entries. // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf in.mipsGot->addEntry(*sec.file, sym, addend, expr); } else if (!sym.isInGot()) { addGotEntry(sym); } } } else { // Handle a reference to a non-preemptible ifunc. These are special in a // few ways: // // - Unlike most non-preemptible symbols, non-preemptible ifuncs do not have // a fixed value. But assuming that all references to the ifunc are // GOT-generating or PLT-generating, the handling of an ifunc is // relatively straightforward. We create a PLT entry in Iplt, which is // usually at the end of .plt, which makes an indirect call using a // matching GOT entry in igotPlt, which is usually at the end of .got.plt. // The GOT entry is relocated using an IRELATIVE relocation in relaIplt, // which is usually at the end of .rela.plt. Unlike most relocations in // .rela.plt, which may be evaluated lazily without -z now, dynamic // loaders evaluate IRELATIVE relocs eagerly, which means that for // IRELATIVE relocs only, GOT-generating relocations can point directly to // .got.plt without requiring a separate GOT entry. // // - Despite the fact that an ifunc does not have a fixed value, compilers // that are not passed -fPIC will assume that they do, and will emit // direct (non-GOT-generating, non-PLT-generating) relocations to the // symbol. This means that if a direct relocation to the symbol is // seen, the linker must set a value for the symbol, and this value must // be consistent no matter what type of reference is made to the symbol. // This can be done by creating a PLT entry for the symbol in the way // described above and making it canonical, that is, making all references // point to the PLT entry instead of the resolver. In lld we also store // the address of the PLT entry in the dynamic symbol table, which means // that the symbol will also have the same value in other modules. // Because the value loaded from the GOT needs to be consistent with // the value computed using a direct relocation, a non-preemptible ifunc // may end up with two GOT entries, one in .got.plt that points to the // address returned by the resolver and is used only by the PLT entry, // and another in .got that points to the PLT entry and is used by // GOT-generating relocations. // // - The fact that these symbols do not have a fixed value makes them an // exception to the general rule that a statically linked executable does // not require any form of dynamic relocation. To handle these relocations // correctly, the IRELATIVE relocations are stored in an array which a // statically linked executable's startup code must enumerate using the // linker-defined symbols __rela?_iplt_{start,end}. if (!sym.isInPlt()) { // Create PLT and GOTPLT slots for the symbol. sym.isInIplt = true; // Create a copy of the symbol to use as the target of the IRELATIVE // relocation in the igotPlt. This is in case we make the PLT canonical // later, which would overwrite the original symbol. // // FIXME: Creating a copy of the symbol here is a bit of a hack. All // that's really needed to create the IRELATIVE is the section and value, // so ideally we should just need to copy those. auto *directSym = make(cast(sym)); addPltEntry(in.iplt, in.igotPlt, in.relaIplt, target->iRelativeRel, *directSym); sym.pltIndex = directSym->pltIndex; } if (needsGot(expr)) { // Redirect GOT accesses to point to the Igot. // // This field is also used to keep track of whether we ever needed a GOT // entry. If we did and we make the PLT canonical later, we'll need to // create a GOT entry pointing to the PLT entry for Sym. sym.gotInIgot = true; } else if (!needsPlt(expr)) { // Make the ifunc's PLT entry canonical by changing the value of its // symbol to redirect all references to point to it. auto &d = cast(sym); d.section = in.iplt; d.value = sym.pltIndex * target->ipltEntrySize; d.size = 0; // It's important to set the symbol type here so that dynamic loaders // don't try to call the PLT as if it were an ifunc resolver. d.type = STT_FUNC; if (sym.gotInIgot) { // We previously encountered a GOT generating reference that we // redirected to the Igot. Now that the PLT entry is canonical we must // clear the redirection to the Igot and add a GOT entry. As we've // changed the symbol type to STT_FUNC future GOT generating references // will naturally use this GOT entry. // // We don't need to worry about creating a MIPS GOT here because ifuncs // aren't a thing on MIPS. sym.gotInIgot = false; addGotEntry(sym); } } } processRelocAux(sec, expr, type, offset, sym, rel, addend); } template static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { OffsetGetter getOffset(sec); // Not all relocations end up in Sec.Relocations, but a lot do. sec.relocations.reserve(rels.size()); for (auto i = rels.begin(), end = rels.end(); i != end;) scanReloc(sec, getOffset, i, end); // Sort relocations by offset for more efficient searching for // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. if (config->emachine == EM_RISCV || (config->emachine == EM_PPC64 && sec.name == ".toc")) llvm::stable_sort(sec.relocations, [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; }); } template void scanRelocations(InputSectionBase &s) { if (s.areRelocsRela) scanRelocs(s, s.relas()); else scanRelocs(s, s.rels()); } static bool mergeCmp(const InputSection *a, const InputSection *b) { // std::merge requires a strict weak ordering. if (a->outSecOff < b->outSecOff) return true; if (a->outSecOff == b->outSecOff) { auto *ta = dyn_cast(a); auto *tb = dyn_cast(b); // Check if Thunk is immediately before any specific Target // InputSection for example Mips LA25 Thunks. if (ta && ta->getTargetInputSection() == b) return true; // Place Thunk Sections without specific targets before // non-Thunk Sections. if (ta && !tb && !ta->getTargetInputSection()) return true; } return false; } // Call Fn on every executable InputSection accessed via the linker script // InputSectionDescription::Sections. static void forEachInputSectionDescription( ArrayRef outputSections, llvm::function_ref fn) { for (OutputSection *os : outputSections) { if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) continue; for (BaseCommand *bc : os->sectionCommands) if (auto *isd = dyn_cast(bc)) fn(os, isd); } } // Thunk Implementation // // Thunks (sometimes called stubs, veneers or branch islands) are small pieces // of code that the linker inserts inbetween a caller and a callee. The thunks // are added at link time rather than compile time as the decision on whether // a thunk is needed, such as the caller and callee being out of range, can only // be made at link time. // // It is straightforward to tell given the current state of the program when a // thunk is needed for a particular call. The more difficult part is that // the thunk needs to be placed in the program such that the caller can reach // the thunk and the thunk can reach the callee; furthermore, adding thunks to // the program alters addresses, which can mean more thunks etc. // // In lld we have a synthetic ThunkSection that can hold many Thunks. // The decision to have a ThunkSection act as a container means that we can // more easily handle the most common case of a single block of contiguous // Thunks by inserting just a single ThunkSection. // // The implementation of Thunks in lld is split across these areas // Relocations.cpp : Framework for creating and placing thunks // Thunks.cpp : The code generated for each supported thunk // Target.cpp : Target specific hooks that the framework uses to decide when // a thunk is used // Synthetic.cpp : Implementation of ThunkSection // Writer.cpp : Iteratively call framework until no more Thunks added // // Thunk placement requirements: // Mips LA25 thunks. These must be placed immediately before the callee section // We can assume that the caller is in range of the Thunk. These are modelled // by Thunks that return the section they must precede with // getTargetInputSection(). // // ARM interworking and range extension thunks. These thunks must be placed // within range of the caller. All implemented ARM thunks can always reach the // callee as they use an indirect jump via a register that has no range // restrictions. // // Thunk placement algorithm: // For Mips LA25 ThunkSections; the placement is explicit, it has to be before // getTargetInputSection(). // // For thunks that must be placed within range of the caller there are many // possible choices given that the maximum range from the caller is usually // much larger than the average InputSection size. Desirable properties include: // - Maximize reuse of thunks by multiple callers // - Minimize number of ThunkSections to simplify insertion // - Handle impact of already added Thunks on addresses // - Simple to understand and implement // // In lld for the first pass, we pre-create one or more ThunkSections per // InputSectionDescription at Target specific intervals. A ThunkSection is // placed so that the estimated end of the ThunkSection is within range of the // start of the InputSectionDescription or the previous ThunkSection. For // example: // InputSectionDescription // Section 0 // ... // Section N // ThunkSection 0 // Section N + 1 // ... // Section N + K // Thunk Section 1 // // The intention is that we can add a Thunk to a ThunkSection that is well // spaced enough to service a number of callers without having to do a lot // of work. An important principle is that it is not an error if a Thunk cannot // be placed in a pre-created ThunkSection; when this happens we create a new // ThunkSection placed next to the caller. This allows us to handle the vast // majority of thunks simply, but also handle rare cases where the branch range // is smaller than the target specific spacing. // // The algorithm is expected to create all the thunks that are needed in a // single pass, with a small number of programs needing a second pass due to // the insertion of thunks in the first pass increasing the offset between // callers and callees that were only just in range. // // A consequence of allowing new ThunkSections to be created outside of the // pre-created ThunkSections is that in rare cases calls to Thunks that were in // range in pass K, are out of range in some pass > K due to the insertion of // more Thunks in between the caller and callee. When this happens we retarget // the relocation back to the original target and create another Thunk. // Remove ThunkSections that are empty, this should only be the initial set // precreated on pass 0. // Insert the Thunks for OutputSection OS into their designated place // in the Sections vector, and recalculate the InputSection output section // offsets. // This may invalidate any output section offsets stored outside of InputSection void ThunkCreator::mergeThunks(ArrayRef outputSections) { forEachInputSectionDescription( outputSections, [&](OutputSection *os, InputSectionDescription *isd) { if (isd->thunkSections.empty()) return; // Remove any zero sized precreated Thunks. llvm::erase_if(isd->thunkSections, [](const std::pair &ts) { return ts.first->getSize() == 0; }); // ISD->ThunkSections contains all created ThunkSections, including // those inserted in previous passes. Extract the Thunks created this // pass and order them in ascending outSecOff. std::vector newThunks; for (std::pair ts : isd->thunkSections) if (ts.second == pass) newThunks.push_back(ts.first); llvm::stable_sort(newThunks, [](const ThunkSection *a, const ThunkSection *b) { return a->outSecOff < b->outSecOff; }); // Merge sorted vectors of Thunks and InputSections by outSecOff std::vector tmp; tmp.reserve(isd->sections.size() + newThunks.size()); std::merge(isd->sections.begin(), isd->sections.end(), newThunks.begin(), newThunks.end(), std::back_inserter(tmp), mergeCmp); isd->sections = std::move(tmp); }); } // Find or create a ThunkSection within the InputSectionDescription (ISD) that // is in range of Src. An ISD maps to a range of InputSections described by a // linker script section pattern such as { .text .text.* }. ThunkSection *ThunkCreator::getISDThunkSec(OutputSection *os, InputSection *isec, InputSectionDescription *isd, uint32_t type, uint64_t src) { for (std::pair tp : isd->thunkSections) { ThunkSection *ts = tp.first; uint64_t tsBase = os->addr + ts->outSecOff; uint64_t tsLimit = tsBase + ts->getSize(); if (target->inBranchRange(type, src, (src > tsLimit) ? tsBase : tsLimit)) return ts; } // No suitable ThunkSection exists. This can happen when there is a branch // with lower range than the ThunkSection spacing or when there are too // many Thunks. Create a new ThunkSection as close to the InputSection as // possible. Error if InputSection is so large we cannot place ThunkSection // anywhere in Range. uint64_t thunkSecOff = isec->outSecOff; if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) { thunkSecOff = isec->outSecOff + isec->getSize(); if (!target->inBranchRange(type, src, os->addr + thunkSecOff)) fatal("InputSection too large for range extension thunk " + isec->getObjMsg(src - (os->addr + isec->outSecOff))); } return addThunkSection(os, isd, thunkSecOff); } // Add a Thunk that needs to be placed in a ThunkSection that immediately // precedes its Target. ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) { ThunkSection *ts = thunkedSections.lookup(isec); if (ts) return ts; // Find InputSectionRange within Target Output Section (TOS) that the // InputSection (IS) that we need to precede is in. OutputSection *tos = isec->getParent(); for (BaseCommand *bc : tos->sectionCommands) { auto *isd = dyn_cast(bc); if (!isd || isd->sections.empty()) continue; InputSection *first = isd->sections.front(); InputSection *last = isd->sections.back(); if (isec->outSecOff < first->outSecOff || last->outSecOff < isec->outSecOff) continue; ts = addThunkSection(tos, isd, isec->outSecOff); thunkedSections[isec] = ts; return ts; } return nullptr; } // Create one or more ThunkSections per OS that can be used to place Thunks. // We attempt to place the ThunkSections using the following desirable // properties: // - Within range of the maximum number of callers // - Minimise the number of ThunkSections // // We follow a simple but conservative heuristic to place ThunkSections at // offsets that are multiples of a Target specific branch range. // For an InputSectionDescription that is smaller than the range, a single // ThunkSection at the end of the range will do. // // For an InputSectionDescription that is more than twice the size of the range, // we place the last ThunkSection at range bytes from the end of the // InputSectionDescription in order to increase the likelihood that the // distance from a thunk to its target will be sufficiently small to // allow for the creation of a short thunk. void ThunkCreator::createInitialThunkSections( ArrayRef outputSections) { uint32_t thunkSectionSpacing = target->getThunkSectionSpacing(); forEachInputSectionDescription( outputSections, [&](OutputSection *os, InputSectionDescription *isd) { if (isd->sections.empty()) return; uint32_t isdBegin = isd->sections.front()->outSecOff; uint32_t isdEnd = isd->sections.back()->outSecOff + isd->sections.back()->getSize(); uint32_t lastThunkLowerBound = -1; if (isdEnd - isdBegin > thunkSectionSpacing * 2) lastThunkLowerBound = isdEnd - thunkSectionSpacing; uint32_t isecLimit; uint32_t prevIsecLimit = isdBegin; uint32_t thunkUpperBound = isdBegin + thunkSectionSpacing; for (const InputSection *isec : isd->sections) { isecLimit = isec->outSecOff + isec->getSize(); if (isecLimit > thunkUpperBound) { addThunkSection(os, isd, prevIsecLimit); thunkUpperBound = prevIsecLimit + thunkSectionSpacing; } if (isecLimit > lastThunkLowerBound) break; prevIsecLimit = isecLimit; } addThunkSection(os, isd, isecLimit); }); } ThunkSection *ThunkCreator::addThunkSection(OutputSection *os, InputSectionDescription *isd, uint64_t off) { auto *ts = make(os, off); ts->partition = os->partition; if ((config->fixCortexA53Errata843419 || config->fixCortexA8) && !isd->sections.empty()) { // The errata fixes are sensitive to addresses modulo 4 KiB. When we add // thunks we disturb the base addresses of sections placed after the thunks // this makes patches we have generated redundant, and may cause us to // generate more patches as different instructions are now in sensitive // locations. When we generate more patches we may force more branches to // go out of range, causing more thunks to be generated. In pathological // cases this can cause the address dependent content pass not to converge. // We fix this by rounding up the size of the ThunkSection to 4KiB, this // limits the insertion of a ThunkSection on the addresses modulo 4 KiB, // which means that adding Thunks to the section does not invalidate // errata patches for following code. // Rounding up the size to 4KiB has consequences for code-size and can // trip up linker script defined assertions. For example the linux kernel // has an assertion that what LLD represents as an InputSectionDescription // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib. // We use the heuristic of rounding up the size when both of the following // conditions are true: // 1.) The OutputSection is larger than the ThunkSectionSpacing. This // accounts for the case where no single InputSectionDescription is // larger than the OutputSection size. This is conservative but simple. // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent // any assertion failures that an InputSectionDescription is < 4 KiB // in size. uint64_t isdSize = isd->sections.back()->outSecOff + isd->sections.back()->getSize() - isd->sections.front()->outSecOff; if (os->size > target->getThunkSectionSpacing() && isdSize > 4096) ts->roundUpSizeForErrata = true; } isd->thunkSections.push_back({ts, pass}); return ts; } static bool isThunkSectionCompatible(InputSection *source, SectionBase *target) { // We can't reuse thunks in different loadable partitions because they might // not be loaded. But partition 1 (the main partition) will always be loaded. if (source->partition != target->partition) return target->partition == 1; return true; } static int64_t getPCBias(RelType type) { if (config->emachine != EM_ARM) return 0; switch (type) { case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: case R_ARM_THM_CALL: return 4; default: return 8; } } std::pair ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector *thunkVec = nullptr; int64_t addend = rel.addend + getPCBias(rel.type); // We use a ((section, offset), addend) pair to find the thunk position if // possible so that we create only one thunk for aliased symbols or ICFed // sections. There may be multiple relocations sharing the same (section, // offset + addend) pair. We may revert the relocation back to its original // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) thunkVec = &thunkedSymbolsBySectionAndAddend[{ {d->section->repl, d->value}, addend}]; if (!thunkVec) thunkVec = &thunkedSymbols[{rel.sym, addend}]; // Check existing Thunks for Sym to see if they can be reused for (Thunk *t : *thunkVec) if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) && t->isCompatibleWith(*isec, rel) && target->inBranchRange(rel.type, src, t->getThunkTargetSym()->getVA(rel.addend) + getPCBias(rel.type))) return std::make_pair(t, false); // No existing compatible Thunk in range, create a new one Thunk *t = addThunk(*isec, rel); thunkVec->push_back(t); return std::make_pair(t, true); } // Return true if the relocation target is an in range Thunk. // Return false if the relocation is not to a Thunk. If the relocation target // was originally to a Thunk, but is no longer in range we revert the // relocation back to its original non-Thunk target. bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { if (Thunk *t = thunks.lookup(rel.sym)) { if (target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } return false; } // Process all relocations from the InputSections that have been assigned // to InputSectionDescriptions and redirect through Thunks if needed. The // function should be called iteratively until it returns false. // // PreConditions: // All InputSections that may need a Thunk are reachable from // OutputSectionCommands. // // All OutputSections have an address and all InputSections have an offset // within the OutputSection. // // The offsets between caller (relocation place) and callee // (relocation target) will not be modified outside of createThunks(). // // PostConditions: // If return value is true then ThunkSections have been inserted into // OutputSections. All relocations that needed a Thunk based on the information // available to createThunks() on entry have been redirected to a Thunk. Note // that adding Thunks changes offsets between caller and callee so more Thunks // may be required. // // If return value is false then no more Thunks are needed, and createThunks has // made no changes. If the target requires range extension thunks, currently // ARM, then any future change in offset between caller and callee risks a // relocation out of range error. bool ThunkCreator::createThunks(ArrayRef outputSections) { bool addressesChanged = false; if (pass == 0 && target->getThunkSectionSpacing()) createInitialThunkSections(outputSections); // Create all the Thunks and insert them into synthetic ThunkSections. The // ThunkSections are later inserted back into InputSectionDescriptions. // We separate the creation of ThunkSections from the insertion of the // ThunkSections as ThunkSections are not always inserted into the same // InputSectionDescription as the caller. forEachInputSectionDescription( outputSections, [&](OutputSection *os, InputSectionDescription *isd) { for (InputSection *isec : isd->sections) for (Relocation &rel : isec->relocations) { uint64_t src = isec->getVA(rel.offset); // If we are a relocation to an existing Thunk, check if it is // still in range. If not then Rel will be altered to point to its // original target so another Thunk can be generated. if (pass > 0 && normalizeExistingThunk(rel, src)) continue; if (!target->needsThunk(rel.expr, rel.type, isec->file, src, *rel.sym, rel.addend)) continue; Thunk *t; bool isNew; std::tie(t, isNew) = getThunk(isec, rel, src); if (isNew) { // Find or create a ThunkSection for the new Thunk ThunkSection *ts; if (auto *tis = t->getTargetInputSection()) ts = getISThunkSec(tis); else ts = getISDThunkSec(os, isec, isd, rel.type, src); ts->addThunk(t); thunks[t->getThunkTargetSym()] = t; } // Redirect relocation to Thunk, we never go via the PLT to a Thunk rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); // On AArch64 and PPC, a jump/call relocation may be encoded as // STT_SECTION + non-zero addend, clear the addend after // redirection. if (config->emachine != EM_MIPS) rel.addend = -getPCBias(rel.type); } for (auto &p : isd->thunkSections) addressesChanged |= p.first->assignOffsets(); }); for (auto &p : thunkedSections) addressesChanged |= p.second->assignOffsets(); // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(outputSections); ++pass; return addressesChanged; } template void scanRelocations(InputSectionBase &); template void scanRelocations(InputSectionBase &); template void scanRelocations(InputSectionBase &); template void scanRelocations(InputSectionBase &); template void reportUndefinedSymbols(); template void reportUndefinedSymbols(); template void reportUndefinedSymbols(); template void reportUndefinedSymbols(); } // namespace elf } // namespace lld Index: head/contrib/llvm-project/lld/ELF/SyntheticSections.cpp =================================================================== --- head/contrib/llvm-project/lld/ELF/SyntheticSections.cpp (revision 359083) +++ head/contrib/llvm-project/lld/ELF/SyntheticSections.cpp (revision 359084) @@ -1,3790 +1,3795 @@ //===- SyntheticSections.cpp ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains linker-synthesized sections. Currently, // synthetic sections are created either output sections or input sections, // but we are rewriting code so that all synthetic sections are created as // input sections. // //===----------------------------------------------------------------------===// #include "SyntheticSections.h" #include "Config.h" #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include #include using namespace llvm; using namespace llvm::dwarf; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using llvm::support::endian::read32le; using llvm::support::endian::write32le; using llvm::support::endian::write64le; namespace lld { namespace elf { constexpr size_t MergeNoTailSection::numShards; static uint64_t readUint(uint8_t *buf) { return config->is64 ? read64(buf) : read32(buf); } static void writeUint(uint8_t *buf, uint64_t val) { if (config->is64) write64(buf, val); else write32(buf, val); } // Returns an LLD version string. static ArrayRef getVersion() { // Check LLD_VERSION first for ease of testing. // You can get consistent output by using the environment variable. // This is only for testing. StringRef s = getenv("LLD_VERSION"); if (s.empty()) s = saver.save(Twine("Linker: ") + getLLDVersion()); // +1 to include the terminating '\0'. return {(const uint8_t *)s.data(), s.size() + 1}; } // Creates a .comment section containing LLD version info. // With this feature, you can identify LLD-generated binaries easily // by "readelf --string-dump .comment ". // The returned object is a mergeable string section. MergeInputSection *createCommentSection() { return make(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, getVersion(), ".comment"); } // .MIPS.abiflags section. template MipsAbiFlagsSection::MipsAbiFlagsSection(Elf_Mips_ABIFlags flags) : SyntheticSection(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), flags(flags) { this->entsize = sizeof(Elf_Mips_ABIFlags); } template void MipsAbiFlagsSection::writeTo(uint8_t *buf) { memcpy(buf, &flags, sizeof(flags)); } template MipsAbiFlagsSection *MipsAbiFlagsSection::create() { Elf_Mips_ABIFlags flags = {}; bool create = false; for (InputSectionBase *sec : inputSections) { if (sec->type != SHT_MIPS_ABIFLAGS) continue; sec->markDead(); create = true; std::string filename = toString(sec->file); const size_t size = sec->data().size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential // zero padding) we ignore everything after the first Elf_Mips_ABIFlags if (size < sizeof(Elf_Mips_ABIFlags)) { error(filename + ": invalid size of .MIPS.abiflags section: got " + Twine(size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); return nullptr; } auto *s = reinterpret_cast(sec->data().data()); if (s->version != 0) { error(filename + ": unexpected .MIPS.abiflags version " + Twine(s->version)); return nullptr; } // LLD checks ISA compatibility in calcMipsEFlags(). Here we just // select the highest number of ISA/Rev/Ext. flags.isa_level = std::max(flags.isa_level, s->isa_level); flags.isa_rev = std::max(flags.isa_rev, s->isa_rev); flags.isa_ext = std::max(flags.isa_ext, s->isa_ext); flags.gpr_size = std::max(flags.gpr_size, s->gpr_size); flags.cpr1_size = std::max(flags.cpr1_size, s->cpr1_size); flags.cpr2_size = std::max(flags.cpr2_size, s->cpr2_size); flags.ases |= s->ases; flags.flags1 |= s->flags1; flags.flags2 |= s->flags2; flags.fp_abi = getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename); }; if (create) return make>(flags); return nullptr; } // .MIPS.options section. template MipsOptionsSection::MipsOptionsSection(Elf_Mips_RegInfo reginfo) : SyntheticSection(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); } template void MipsOptionsSection::writeTo(uint8_t *buf) { auto *options = reinterpret_cast(buf); options->kind = ODK_REGINFO; options->size = getSize(); if (!config->relocatable) reginfo.ri_gp_value = in.mipsGot->getGp(); memcpy(buf + sizeof(Elf_Mips_Options), ®info, sizeof(reginfo)); } template MipsOptionsSection *MipsOptionsSection::create() { // N64 ABI only. if (!ELFT::Is64Bits) return nullptr; std::vector sections; for (InputSectionBase *sec : inputSections) if (sec->type == SHT_MIPS_OPTIONS) sections.push_back(sec); if (sections.empty()) return nullptr; Elf_Mips_RegInfo reginfo = {}; for (InputSectionBase *sec : sections) { sec->markDead(); std::string filename = toString(sec->file); ArrayRef d = sec->data(); while (!d.empty()) { if (d.size() < sizeof(Elf_Mips_Options)) { error(filename + ": invalid size of .MIPS.options section"); break; } auto *opt = reinterpret_cast(d.data()); if (opt->kind == ODK_REGINFO) { reginfo.ri_gprmask |= opt->getRegInfo().ri_gprmask; sec->getFile()->mipsGp0 = opt->getRegInfo().ri_gp_value; break; } if (!opt->size) fatal(filename + ": zero option descriptor size"); d = d.slice(opt->size); } }; return make>(reginfo); } // MIPS .reginfo section. template MipsReginfoSection::MipsReginfoSection(Elf_Mips_RegInfo reginfo) : SyntheticSection(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_RegInfo); } template void MipsReginfoSection::writeTo(uint8_t *buf) { if (!config->relocatable) reginfo.ri_gp_value = in.mipsGot->getGp(); memcpy(buf, ®info, sizeof(reginfo)); } template MipsReginfoSection *MipsReginfoSection::create() { // Section should be alive for O32 and N32 ABIs only. if (ELFT::Is64Bits) return nullptr; std::vector sections; for (InputSectionBase *sec : inputSections) if (sec->type == SHT_MIPS_REGINFO) sections.push_back(sec); if (sections.empty()) return nullptr; Elf_Mips_RegInfo reginfo = {}; for (InputSectionBase *sec : sections) { sec->markDead(); if (sec->data().size() != sizeof(Elf_Mips_RegInfo)) { error(toString(sec->file) + ": invalid size of .reginfo section"); return nullptr; } auto *r = reinterpret_cast(sec->data().data()); reginfo.ri_gprmask |= r->ri_gprmask; sec->getFile()->mipsGp0 = r->ri_gp_value; }; return make>(reginfo); } InputSection *createInterpSection() { // StringSaver guarantees that the returned string ends with '\0'. StringRef s = saver.save(config->dynamicLinker); ArrayRef contents = {(const uint8_t *)s.data(), s.size() + 1}; return make(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, contents, ".interp"); } Defined *addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, uint64_t size, InputSectionBase §ion) { auto *s = make(section.file, name, STB_LOCAL, STV_DEFAULT, type, value, size, §ion); if (in.symTab) in.symTab->addSymbol(s); return s; } static size_t getHashSize() { switch (config->buildId) { case BuildIdKind::Fast: return 8; case BuildIdKind::Md5: case BuildIdKind::Uuid: return 16; case BuildIdKind::Sha1: return 20; case BuildIdKind::Hexstring: return config->buildIdVector.size(); default: llvm_unreachable("unknown BuildIdKind"); } } // This class represents a linker-synthesized .note.gnu.property section. // // In x86 and AArch64, object files may contain feature flags indicating the // features that they have used. The flags are stored in a .note.gnu.property // section. // // lld reads the sections from input files and merges them by computing AND of // the flags. The result is written as a new .note.gnu.property section. // // If the flag is zero (which indicates that the intersection of the feature // sets is empty, or some input files didn't have .note.gnu.property sections), // we don't create this section. GnuPropertySection::GnuPropertySection() : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, config->wordsize, ".note.gnu.property") {} void GnuPropertySection::writeTo(uint8_t *buf) { uint32_t featureAndType = config->emachine == EM_AARCH64 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND : GNU_PROPERTY_X86_FEATURE_1_AND; write32(buf, 4); // Name size write32(buf + 4, config->is64 ? 16 : 12); // Content size write32(buf + 8, NT_GNU_PROPERTY_TYPE_0); // Type memcpy(buf + 12, "GNU", 4); // Name string write32(buf + 16, featureAndType); // Feature type write32(buf + 20, 4); // Feature size write32(buf + 24, config->andFeatures); // Feature flags if (config->is64) write32(buf + 28, 0); // Padding } size_t GnuPropertySection::getSize() const { return config->is64 ? 32 : 28; } BuildIdSection::BuildIdSection() : SyntheticSection(SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"), hashSize(getHashSize()) {} void BuildIdSection::writeTo(uint8_t *buf) { write32(buf, 4); // Name size write32(buf + 4, hashSize); // Content size write32(buf + 8, NT_GNU_BUILD_ID); // Type memcpy(buf + 12, "GNU", 4); // Name string hashBuf = buf + 16; } void BuildIdSection::writeBuildId(ArrayRef buf) { assert(buf.size() == hashSize); memcpy(hashBuf, buf.data(), hashSize); } BssSection::BssSection(StringRef name, uint64_t size, uint32_t alignment) : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, alignment, name) { this->bss = true; this->size = size; } EhFrameSection::EhFrameSection() : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {} // Search for an existing CIE record or create a new one. // CIE records from input object files are uniquified by their contents // and where their relocations point to. template CieRecord *EhFrameSection::addCie(EhSectionPiece &cie, ArrayRef rels) { Symbol *personality = nullptr; unsigned firstRelI = cie.firstRelocation; if (firstRelI != (unsigned)-1) personality = &cie.sec->template getFile()->getRelocTargetSym(rels[firstRelI]); // Search for an existing CIE by CIE contents/relocation target pair. CieRecord *&rec = cieMap[{cie.data(), personality}]; // If not found, create a new one. if (!rec) { rec = make(); rec->cie = &cie; cieRecords.push_back(rec); } return rec; } // There is one FDE per function. Returns true if a given FDE // points to a live function. template bool EhFrameSection::isFdeLive(EhSectionPiece &fde, ArrayRef rels) { auto *sec = cast(fde.sec); unsigned firstRelI = fde.firstRelocation; // An FDE should point to some function because FDEs are to describe // functions. That's however not always the case due to an issue of // ld.gold with -r. ld.gold may discard only functions and leave their // corresponding FDEs, which results in creating bad .eh_frame sections. // To deal with that, we ignore such FDEs. if (firstRelI == (unsigned)-1) return false; const RelTy &rel = rels[firstRelI]; Symbol &b = sec->template getFile()->getRelocTargetSym(rel); // FDEs for garbage-collected or merged-by-ICF sections, or sections in // another partition, are dead. if (auto *d = dyn_cast(&b)) if (SectionBase *sec = d->section) return sec->partition == partition; return false; } // .eh_frame is a sequence of CIE or FDE records. In general, there // is one CIE record per input object file which is followed by // a list of FDEs. This function searches an existing CIE or create a new // one and associates FDEs to the CIE. template void EhFrameSection::addRecords(EhInputSection *sec, ArrayRef rels) { offsetToCie.clear(); for (EhSectionPiece &piece : sec->pieces) { // The empty record is the end marker. if (piece.size == 4) return; size_t offset = piece.inputOff; uint32_t id = read32(piece.data().data() + 4); if (id == 0) { offsetToCie[offset] = addCie(piece, rels); continue; } uint32_t cieOffset = offset + 4 - id; CieRecord *rec = offsetToCie[cieOffset]; if (!rec) fatal(toString(sec) + ": invalid CIE reference"); if (!isFdeLive(piece, rels)) continue; rec->fdes.push_back(&piece); numFdes++; } } template void EhFrameSection::addSectionAux(EhInputSection *sec) { if (!sec->isLive()) return; if (sec->areRelocsRela) addRecords(sec, sec->template relas()); else addRecords(sec, sec->template rels()); } void EhFrameSection::addSection(EhInputSection *sec) { sec->parent = this; alignment = std::max(alignment, sec->alignment); sections.push_back(sec); for (auto *ds : sec->dependentSections) dependentSections.push_back(ds); } static void writeCieFde(uint8_t *buf, ArrayRef d) { memcpy(buf, d.data(), d.size()); size_t aligned = alignTo(d.size(), config->wordsize); // Zero-clear trailing padding if it exists. memset(buf + d.size(), 0, aligned - d.size()); // Fix the size field. -4 since size does not include the size field itself. write32(buf, aligned - 4); } void EhFrameSection::finalizeContents() { assert(!this->size); // Not finalized. switch (config->ekind) { case ELFNoneKind: llvm_unreachable("invalid ekind"); case ELF32LEKind: for (EhInputSection *sec : sections) addSectionAux(sec); break; case ELF32BEKind: for (EhInputSection *sec : sections) addSectionAux(sec); break; case ELF64LEKind: for (EhInputSection *sec : sections) addSectionAux(sec); break; case ELF64BEKind: for (EhInputSection *sec : sections) addSectionAux(sec); break; } size_t off = 0; for (CieRecord *rec : cieRecords) { rec->cie->outputOff = off; off += alignTo(rec->cie->size, config->wordsize); for (EhSectionPiece *fde : rec->fdes) { fde->outputOff = off; off += alignTo(fde->size, config->wordsize); } } // The LSB standard does not allow a .eh_frame section with zero // Call Frame Information records. glibc unwind-dw2-fde.c // classify_object_over_fdes expects there is a CIE record length 0 as a // terminator. Thus we add one unconditionally. off += 4; this->size = off; } // Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table // to get an FDE from an address to which FDE is applied. This function // returns a list of such pairs. std::vector EhFrameSection::getFdeData() const { uint8_t *buf = Out::bufferStart + getParent()->offset + outSecOff; std::vector ret; uint64_t va = getPartition().ehFrameHdr->getVA(); for (CieRecord *rec : cieRecords) { uint8_t enc = getFdeEncoding(rec->cie); for (EhSectionPiece *fde : rec->fdes) { uint64_t pc = getFdePc(buf, fde->outputOff, enc); uint64_t fdeVA = getParent()->addr + fde->outputOff; if (!isInt<32>(pc - va)) fatal(toString(fde->sec) + ": PC offset is too large: 0x" + Twine::utohexstr(pc - va)); ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); } } // Sort the FDE list by their PC and uniqueify. Usually there is only // one FDE for a PC (i.e. function), but if ICF merges two functions // into one, there can be more than one FDEs pointing to the address. auto less = [](const FdeData &a, const FdeData &b) { return a.pcRel < b.pcRel; }; llvm::stable_sort(ret, less); auto eq = [](const FdeData &a, const FdeData &b) { return a.pcRel == b.pcRel; }; ret.erase(std::unique(ret.begin(), ret.end(), eq), ret.end()); return ret; } static uint64_t readFdeAddr(uint8_t *buf, int size) { switch (size) { case DW_EH_PE_udata2: return read16(buf); case DW_EH_PE_sdata2: return (int16_t)read16(buf); case DW_EH_PE_udata4: return read32(buf); case DW_EH_PE_sdata4: return (int32_t)read32(buf); case DW_EH_PE_udata8: case DW_EH_PE_sdata8: return read64(buf); case DW_EH_PE_absptr: return readUint(buf); } fatal("unknown FDE size encoding"); } // Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. // We need it to create .eh_frame_hdr section. uint64_t EhFrameSection::getFdePc(uint8_t *buf, size_t fdeOff, uint8_t enc) const { // The starting address to which this FDE applies is // stored at FDE + 8 byte. size_t off = fdeOff + 8; uint64_t addr = readFdeAddr(buf + off, enc & 0xf); if ((enc & 0x70) == DW_EH_PE_absptr) return addr; if ((enc & 0x70) == DW_EH_PE_pcrel) return addr + getParent()->addr + off; fatal("unknown FDE size relative encoding"); } void EhFrameSection::writeTo(uint8_t *buf) { // Write CIE and FDE records. for (CieRecord *rec : cieRecords) { size_t cieOffset = rec->cie->outputOff; writeCieFde(buf + cieOffset, rec->cie->data()); for (EhSectionPiece *fde : rec->fdes) { size_t off = fde->outputOff; writeCieFde(buf + off, fde->data()); // FDE's second word should have the offset to an associated CIE. // Write it. write32(buf + off + 4, off + 4 - cieOffset); } } // Apply relocations. .eh_frame section contents are not contiguous // in the output buffer, but relocateAlloc() still works because // getOffset() takes care of discontiguous section pieces. for (EhInputSection *s : sections) s->relocateAlloc(buf, nullptr); if (getPartition().ehFrameHdr && getPartition().ehFrameHdr->getParent()) getPartition().ehFrameHdr->write(); } GotSection::GotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize, ".got") { // If ElfSym::globalOffsetTable is relative to .got and is referenced, // increase numEntries by the number of entries used to emit // ElfSym::globalOffsetTable. if (ElfSym::globalOffsetTable && !target->gotBaseSymInGotPlt) numEntries += target->gotHeaderEntriesNum; } void GotSection::addEntry(Symbol &sym) { sym.gotIndex = numEntries; ++numEntries; } bool GotSection::addDynTlsEntry(Symbol &sym) { if (sym.globalDynIndex != -1U) return false; sym.globalDynIndex = numEntries; // Global Dynamic TLS entries take two GOT slots. numEntries += 2; return true; } // Reserves TLS entries for a TLS module ID and a TLS block offset. // In total it takes two GOT slots. bool GotSection::addTlsIndex() { if (tlsIndexOff != uint32_t(-1)) return false; tlsIndexOff = numEntries * config->wordsize; numEntries += 2; return true; } uint64_t GotSection::getGlobalDynAddr(const Symbol &b) const { return this->getVA() + b.globalDynIndex * config->wordsize; } uint64_t GotSection::getGlobalDynOffset(const Symbol &b) const { return b.globalDynIndex * config->wordsize; } void GotSection::finalizeContents() { size = numEntries * config->wordsize; } bool GotSection::isNeeded() const { // We need to emit a GOT even if it's empty if there's a relocation that is // relative to GOT(such as GOTOFFREL). return numEntries || hasGotOffRel; } void GotSection::writeTo(uint8_t *buf) { // Buf points to the start of this section's buffer, // whereas InputSectionBase::relocateAlloc() expects its argument // to point to the start of the output section. target->writeGotHeader(buf); relocateAlloc(buf - outSecOff, buf - outSecOff + size); } static uint64_t getMipsPageAddr(uint64_t addr) { return (addr + 0x8000) & ~0xffff; } static uint64_t getMipsPageCount(uint64_t size) { return (size + 0xfffe) / 0xffff + 1; } MipsGotSection::MipsGotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16, ".got") {} void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr) { FileGot &g = getGot(file); if (expr == R_MIPS_GOT_LOCAL_PAGE) { if (const OutputSection *os = sym.getOutputSection()) g.pagesMap.insert({os, {}}); else g.local16.insert({{nullptr, getMipsPageAddr(sym.getVA(addend))}, 0}); } else if (sym.isTls()) g.tls.insert({&sym, 0}); else if (sym.isPreemptible && expr == R_ABS) g.relocs.insert({&sym, 0}); else if (sym.isPreemptible) g.global.insert({&sym, 0}); else if (expr == R_MIPS_GOT_OFF32) g.local32.insert({{&sym, addend}, 0}); else g.local16.insert({{&sym, addend}, 0}); } void MipsGotSection::addDynTlsEntry(InputFile &file, Symbol &sym) { getGot(file).dynTlsSymbols.insert({&sym, 0}); } void MipsGotSection::addTlsIndex(InputFile &file) { getGot(file).dynTlsSymbols.insert({nullptr, 0}); } size_t MipsGotSection::FileGot::getEntriesNum() const { return getPageEntriesNum() + local16.size() + global.size() + relocs.size() + tls.size() + dynTlsSymbols.size() * 2; } size_t MipsGotSection::FileGot::getPageEntriesNum() const { size_t num = 0; for (const std::pair &p : pagesMap) num += p.second.count; return num; } size_t MipsGotSection::FileGot::getIndexedEntriesNum() const { size_t count = getPageEntriesNum() + local16.size() + global.size(); // If there are relocation-only entries in the GOT, TLS entries // are allocated after them. TLS entries should be addressable // by 16-bit index so count both reloc-only and TLS entries. if (!tls.empty() || !dynTlsSymbols.empty()) count += relocs.size() + tls.size() + dynTlsSymbols.size() * 2; return count; } MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &f) { if (!f.mipsGotIndex.hasValue()) { gots.emplace_back(); gots.back().file = &f; f.mipsGotIndex = gots.size() - 1; } return gots[*f.mipsGotIndex]; } uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f, const Symbol &sym, int64_t addend) const { const FileGot &g = gots[*f->mipsGotIndex]; uint64_t index = 0; if (const OutputSection *outSec = sym.getOutputSection()) { uint64_t secAddr = getMipsPageAddr(outSec->addr); uint64_t symAddr = getMipsPageAddr(sym.getVA(addend)); index = g.pagesMap.lookup(outSec).firstIndex + (symAddr - secAddr) / 0xffff; } else { index = g.local16.lookup({nullptr, getMipsPageAddr(sym.getVA(addend))}); } return index * config->wordsize; } uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s, int64_t addend) const { const FileGot &g = gots[*f->mipsGotIndex]; Symbol *sym = const_cast(&s); if (sym->isTls()) return g.tls.lookup(sym) * config->wordsize; if (sym->isPreemptible) return g.global.lookup(sym) * config->wordsize; return g.local16.lookup({sym, addend}) * config->wordsize; } uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *f) const { const FileGot &g = gots[*f->mipsGotIndex]; return g.dynTlsSymbols.lookup(nullptr) * config->wordsize; } uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *f, const Symbol &s) const { const FileGot &g = gots[*f->mipsGotIndex]; Symbol *sym = const_cast(&s); return g.dynTlsSymbols.lookup(sym) * config->wordsize; } const Symbol *MipsGotSection::getFirstGlobalEntry() const { if (gots.empty()) return nullptr; const FileGot &primGot = gots.front(); if (!primGot.global.empty()) return primGot.global.front().first; if (!primGot.relocs.empty()) return primGot.relocs.front().first; return nullptr; } unsigned MipsGotSection::getLocalEntriesNum() const { if (gots.empty()) return headerEntriesNum; return headerEntriesNum + gots.front().getPageEntriesNum() + gots.front().local16.size(); } bool MipsGotSection::tryMergeGots(FileGot &dst, FileGot &src, bool isPrimary) { FileGot tmp = dst; set_union(tmp.pagesMap, src.pagesMap); set_union(tmp.local16, src.local16); set_union(tmp.global, src.global); set_union(tmp.relocs, src.relocs); set_union(tmp.tls, src.tls); set_union(tmp.dynTlsSymbols, src.dynTlsSymbols); size_t count = isPrimary ? headerEntriesNum : 0; count += tmp.getIndexedEntriesNum(); if (count * config->wordsize > config->mipsGotSize) return false; std::swap(tmp, dst); return true; } void MipsGotSection::finalizeContents() { updateAllocSize(); } bool MipsGotSection::updateAllocSize() { size = headerEntriesNum * config->wordsize; for (const FileGot &g : gots) size += g.getEntriesNum() * config->wordsize; return false; } void MipsGotSection::build() { if (gots.empty()) return; std::vector mergedGots(1); // For each GOT move non-preemptible symbols from the `Global` // to `Local16` list. Preemptible symbol might become non-preemptible // one if, for example, it gets a related copy relocation. for (FileGot &got : gots) { for (auto &p: got.global) if (!p.first->isPreemptible) got.local16.insert({{p.first, 0}, 0}); got.global.remove_if([&](const std::pair &p) { return !p.first->isPreemptible; }); } // For each GOT remove "reloc-only" entry if there is "global" // entry for the same symbol. And add local entries which indexed // using 32-bit value at the end of 16-bit entries. for (FileGot &got : gots) { got.relocs.remove_if([&](const std::pair &p) { return got.global.count(p.first); }); set_union(got.local16, got.local32); got.local32.clear(); } // Evaluate number of "reloc-only" entries in the resulting GOT. // To do that put all unique "reloc-only" and "global" entries // from all GOTs to the future primary GOT. FileGot *primGot = &mergedGots.front(); for (FileGot &got : gots) { set_union(primGot->relocs, got.global); set_union(primGot->relocs, got.relocs); got.relocs.clear(); } // Evaluate number of "page" entries in each GOT. for (FileGot &got : gots) { for (std::pair &p : got.pagesMap) { const OutputSection *os = p.first; uint64_t secSize = 0; for (BaseCommand *cmd : os->sectionCommands) { if (auto *isd = dyn_cast(cmd)) for (InputSection *isec : isd->sections) { uint64_t off = alignTo(secSize, isec->alignment); secSize = off + isec->getSize(); } } p.second.count = getMipsPageCount(secSize); } } // Merge GOTs. Try to join as much as possible GOTs but do not exceed // maximum GOT size. At first, try to fill the primary GOT because // the primary GOT can be accessed in the most effective way. If it // is not possible, try to fill the last GOT in the list, and finally // create a new GOT if both attempts failed. for (FileGot &srcGot : gots) { InputFile *file = srcGot.file; if (tryMergeGots(mergedGots.front(), srcGot, true)) { file->mipsGotIndex = 0; } else { // If this is the first time we failed to merge with the primary GOT, // MergedGots.back() will also be the primary GOT. We must make sure not // to try to merge again with isPrimary=false, as otherwise, if the // inputs are just right, we could allow the primary GOT to become 1 or 2 // words bigger due to ignoring the header size. if (mergedGots.size() == 1 || !tryMergeGots(mergedGots.back(), srcGot, false)) { mergedGots.emplace_back(); std::swap(mergedGots.back(), srcGot); } file->mipsGotIndex = mergedGots.size() - 1; } } std::swap(gots, mergedGots); // Reduce number of "reloc-only" entries in the primary GOT // by subtracting "global" entries in the primary GOT. primGot = &gots.front(); primGot->relocs.remove_if([&](const std::pair &p) { return primGot->global.count(p.first); }); // Calculate indexes for each GOT entry. size_t index = headerEntriesNum; for (FileGot &got : gots) { got.startIndex = &got == primGot ? 0 : index; for (std::pair &p : got.pagesMap) { // For each output section referenced by GOT page relocations calculate // and save into pagesMap an upper bound of MIPS GOT entries required // to store page addresses of local symbols. We assume the worst case - // each 64kb page of the output section has at least one GOT relocation // against it. And take in account the case when the section intersects // page boundaries. p.second.firstIndex = index; index += p.second.count; } for (auto &p: got.local16) p.second = index++; for (auto &p: got.global) p.second = index++; for (auto &p: got.relocs) p.second = index++; for (auto &p: got.tls) p.second = index++; for (auto &p: got.dynTlsSymbols) { p.second = index; index += 2; } } // Update Symbol::gotIndex field to use this // value later in the `sortMipsSymbols` function. for (auto &p : primGot->global) p.first->gotIndex = p.second; for (auto &p : primGot->relocs) p.first->gotIndex = p.second; // Create dynamic relocations. for (FileGot &got : gots) { // Create dynamic relocations for TLS entries. for (std::pair &p : got.tls) { Symbol *s = p.first; uint64_t offset = p.second * config->wordsize; if (s->isPreemptible) mainPart->relaDyn->addReloc(target->tlsGotRel, this, offset, s); } for (std::pair &p : got.dynTlsSymbols) { Symbol *s = p.first; uint64_t offset = p.second * config->wordsize; if (s == nullptr) { if (!config->isPic) continue; mainPart->relaDyn->addReloc(target->tlsModuleIndexRel, this, offset, s); } else { // When building a shared library we still need a dynamic relocation // for the module index. Therefore only checking for // S->isPreemptible is not sufficient (this happens e.g. for // thread-locals that have been marked as local through a linker script) if (!s->isPreemptible && !config->isPic) continue; mainPart->relaDyn->addReloc(target->tlsModuleIndexRel, this, offset, s); // However, we can skip writing the TLS offset reloc for non-preemptible // symbols since it is known even in shared libraries if (!s->isPreemptible) continue; offset += config->wordsize; mainPart->relaDyn->addReloc(target->tlsOffsetRel, this, offset, s); } } // Do not create dynamic relocations for non-TLS // entries in the primary GOT. if (&got == primGot) continue; // Dynamic relocations for "global" entries. for (const std::pair &p : got.global) { uint64_t offset = p.second * config->wordsize; mainPart->relaDyn->addReloc(target->relativeRel, this, offset, p.first); } if (!config->isPic) continue; // Dynamic relocations for "local" entries in case of PIC. for (const std::pair &l : got.pagesMap) { size_t pageCount = l.second.count; for (size_t pi = 0; pi < pageCount; ++pi) { uint64_t offset = (l.second.firstIndex + pi) * config->wordsize; mainPart->relaDyn->addReloc({target->relativeRel, this, offset, l.first, int64_t(pi * 0x10000)}); } } for (const std::pair &p : got.local16) { uint64_t offset = p.second * config->wordsize; mainPart->relaDyn->addReloc({target->relativeRel, this, offset, true, p.first.first, p.first.second}); } } } bool MipsGotSection::isNeeded() const { // We add the .got section to the result for dynamic MIPS target because // its address and properties are mentioned in the .dynamic section. return !config->relocatable; } uint64_t MipsGotSection::getGp(const InputFile *f) const { // For files without related GOT or files refer a primary GOT // returns "common" _gp value. For secondary GOTs calculate // individual _gp values. if (!f || !f->mipsGotIndex.hasValue() || *f->mipsGotIndex == 0) return ElfSym::mipsGp->getVA(0); return getVA() + gots[*f->mipsGotIndex].startIndex * config->wordsize + 0x7ff0; } void MipsGotSection::writeTo(uint8_t *buf) { // Set the MSB of the second GOT slot. This is not required by any // MIPS ABI documentation, though. // // There is a comment in glibc saying that "The MSB of got[1] of a // gnu object is set to identify gnu objects," and in GNU gold it // says "the second entry will be used by some runtime loaders". // But how this field is being used is unclear. // // We are not really willing to mimic other linkers behaviors // without understanding why they do that, but because all files // generated by GNU tools have this special GOT value, and because // we've been doing this for years, it is probably a safe bet to // keep doing this for now. We really need to revisit this to see // if we had to do this. writeUint(buf + config->wordsize, (uint64_t)1 << (config->wordsize * 8 - 1)); for (const FileGot &g : gots) { auto write = [&](size_t i, const Symbol *s, int64_t a) { uint64_t va = a; if (s) va = s->getVA(a); writeUint(buf + i * config->wordsize, va); }; // Write 'page address' entries to the local part of the GOT. for (const std::pair &l : g.pagesMap) { size_t pageCount = l.second.count; uint64_t firstPageAddr = getMipsPageAddr(l.first->addr); for (size_t pi = 0; pi < pageCount; ++pi) write(l.second.firstIndex + pi, nullptr, firstPageAddr + pi * 0x10000); } // Local, global, TLS, reloc-only entries. // If TLS entry has a corresponding dynamic relocations, leave it // initialized by zero. Write down adjusted TLS symbol's values otherwise. // To calculate the adjustments use offsets for thread-local storage. // https://www.linux-mips.org/wiki/NPTL for (const std::pair &p : g.local16) write(p.second, p.first.first, p.first.second); // Write VA to the primary GOT only. For secondary GOTs that // will be done by REL32 dynamic relocations. if (&g == &gots.front()) for (const std::pair &p : g.global) write(p.second, p.first, 0); for (const std::pair &p : g.relocs) write(p.second, p.first, 0); for (const std::pair &p : g.tls) write(p.second, p.first, p.first->isPreemptible ? 0 : -0x7000); for (const std::pair &p : g.dynTlsSymbols) { if (p.first == nullptr && !config->isPic) write(p.second, nullptr, 1); else if (p.first && !p.first->isPreemptible) { // If we are emitting PIC code with relocations we mustn't write // anything to the GOT here. When using Elf_Rel relocations the value // one will be treated as an addend and will cause crashes at runtime if (!config->isPic) write(p.second, nullptr, 1); write(p.second + 1, p.first, -0x8000); } } } } // On PowerPC the .plt section is used to hold the table of function addresses // instead of the .got.plt, and the type is SHT_NOBITS similar to a .bss // section. I don't know why we have a BSS style type for the section but it is // consistent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. GotPltSection::GotPltSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize, ".got.plt") { if (config->emachine == EM_PPC) { name = ".plt"; } else if (config->emachine == EM_PPC64) { type = SHT_NOBITS; name = ".plt"; } } void GotPltSection::addEntry(Symbol &sym) { assert(sym.pltIndex == entries.size()); entries.push_back(&sym); } size_t GotPltSection::getSize() const { return (target->gotPltHeaderEntriesNum + entries.size()) * config->wordsize; } void GotPltSection::writeTo(uint8_t *buf) { target->writeGotPltHeader(buf); buf += target->gotPltHeaderEntriesNum * config->wordsize; for (const Symbol *b : entries) { target->writeGotPlt(buf, *b); buf += config->wordsize; } } bool GotPltSection::isNeeded() const { // We need to emit GOTPLT even if it's empty if there's a relocation relative // to it. return !entries.empty() || hasGotPltOffRel; } static StringRef getIgotPltName() { // On ARM the IgotPltSection is part of the GotSection. if (config->emachine == EM_ARM) return ".got"; // On PowerPC64 the GotPltSection is renamed to '.plt' so the IgotPltSection // needs to be named the same. if (config->emachine == EM_PPC64) return ".plt"; return ".got.plt"; } // On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit // with the IgotPltSection. IgotPltSection::IgotPltSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, config->emachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS, config->wordsize, getIgotPltName()) {} void IgotPltSection::addEntry(Symbol &sym) { assert(sym.pltIndex == entries.size()); entries.push_back(&sym); } size_t IgotPltSection::getSize() const { return entries.size() * config->wordsize; } void IgotPltSection::writeTo(uint8_t *buf) { for (const Symbol *b : entries) { target->writeIgotPlt(buf, *b); buf += config->wordsize; } } StringTableSection::StringTableSection(StringRef name, bool dynamic) : SyntheticSection(dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, name), dynamic(dynamic) { // ELF string tables start with a NUL byte. addString(""); } // Adds a string to the string table. If `hashIt` is true we hash and check for // duplicates. It is optional because the name of global symbols are already // uniqued and hashing them again has a big cost for a small value: uniquing // them with some other string that happens to be the same. unsigned StringTableSection::addString(StringRef s, bool hashIt) { if (hashIt) { auto r = stringMap.insert(std::make_pair(s, this->size)); if (!r.second) return r.first->second; } unsigned ret = this->size; this->size = this->size + s.size() + 1; strings.push_back(s); return ret; } void StringTableSection::writeTo(uint8_t *buf) { for (StringRef s : strings) { memcpy(buf, s.data(), s.size()); buf[s.size()] = '\0'; buf += s.size() + 1; } } // Returns the number of entries in .gnu.version_d: the number of // non-VER_NDX_LOCAL-non-VER_NDX_GLOBAL definitions, plus 1. // Note that we don't support vd_cnt > 1 yet. static unsigned getVerDefNum() { return namedVersionDefs().size() + 1; } template DynamicSection::DynamicSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, config->wordsize, ".dynamic") { this->entsize = ELFT::Is64Bits ? 16 : 8; // .dynamic section is not writable on MIPS and on Fuchsia OS // which passes -z rodynamic. // See "Special Section" in Chapter 4 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf if (config->emachine == EM_MIPS || config->zRodynamic) this->flags = SHF_ALLOC; } template void DynamicSection::add(int32_t tag, std::function fn) { entries.push_back({tag, fn}); } template void DynamicSection::addInt(int32_t tag, uint64_t val) { entries.push_back({tag, [=] { return val; }}); } template void DynamicSection::addInSec(int32_t tag, InputSection *sec) { entries.push_back({tag, [=] { return sec->getVA(0); }}); } template void DynamicSection::addInSecRelative(int32_t tag, InputSection *sec) { size_t tagOffset = entries.size() * entsize; entries.push_back( {tag, [=] { return sec->getVA(0) - (getVA() + tagOffset); }}); } template void DynamicSection::addOutSec(int32_t tag, OutputSection *sec) { entries.push_back({tag, [=] { return sec->addr; }}); } template void DynamicSection::addSize(int32_t tag, OutputSection *sec) { entries.push_back({tag, [=] { return sec->size; }}); } template void DynamicSection::addSym(int32_t tag, Symbol *sym) { entries.push_back({tag, [=] { return sym->getVA(); }}); } // The output section .rela.dyn may include these synthetic sections: // // - part.relaDyn // - in.relaIplt: this is included if in.relaIplt is named .rela.dyn // - in.relaPlt: this is included if a linker script places .rela.plt inside // .rela.dyn // // DT_RELASZ is the total size of the included sections. static std::function addRelaSz(RelocationBaseSection *relaDyn) { return [=]() { size_t size = relaDyn->getSize(); if (in.relaIplt->getParent() == relaDyn->getParent()) size += in.relaIplt->getSize(); if (in.relaPlt->getParent() == relaDyn->getParent()) size += in.relaPlt->getSize(); return size; }; } // A Linker script may assign the RELA relocation sections to the same // output section. When this occurs we cannot just use the OutputSection // Size. Moreover the [DT_JMPREL, DT_JMPREL + DT_PLTRELSZ) is permitted to // overlap with the [DT_RELA, DT_RELA + DT_RELASZ). static uint64_t addPltRelSz() { size_t size = in.relaPlt->getSize(); if (in.relaIplt->getParent() == in.relaPlt->getParent() && in.relaIplt->name == in.relaPlt->name) size += in.relaIplt->getSize(); return size; } // Add remaining entries to complete .dynamic contents. template void DynamicSection::finalizeContents() { Partition &part = getPartition(); bool isMain = part.name.empty(); for (StringRef s : config->filterList) addInt(DT_FILTER, part.dynStrTab->addString(s)); for (StringRef s : config->auxiliaryList) addInt(DT_AUXILIARY, part.dynStrTab->addString(s)); if (!config->rpath.empty()) addInt(config->enableNewDtags ? DT_RUNPATH : DT_RPATH, part.dynStrTab->addString(config->rpath)); for (SharedFile *file : sharedFiles) if (file->isNeeded) addInt(DT_NEEDED, part.dynStrTab->addString(file->soName)); if (isMain) { if (!config->soName.empty()) addInt(DT_SONAME, part.dynStrTab->addString(config->soName)); } else { if (!config->soName.empty()) addInt(DT_NEEDED, part.dynStrTab->addString(config->soName)); addInt(DT_SONAME, part.dynStrTab->addString(part.name)); } // Set DT_FLAGS and DT_FLAGS_1. uint32_t dtFlags = 0; uint32_t dtFlags1 = 0; if (config->bsymbolic) dtFlags |= DF_SYMBOLIC; if (config->zGlobal) dtFlags1 |= DF_1_GLOBAL; if (config->zInitfirst) dtFlags1 |= DF_1_INITFIRST; if (config->zInterpose) dtFlags1 |= DF_1_INTERPOSE; if (config->zNodefaultlib) dtFlags1 |= DF_1_NODEFLIB; if (config->zNodelete) dtFlags1 |= DF_1_NODELETE; if (config->zNodlopen) dtFlags1 |= DF_1_NOOPEN; if (config->zNow) { dtFlags |= DF_BIND_NOW; dtFlags1 |= DF_1_NOW; } if (config->zOrigin) { dtFlags |= DF_ORIGIN; dtFlags1 |= DF_1_ORIGIN; } if (!config->zText) dtFlags |= DF_TEXTREL; if (config->hasStaticTlsModel) dtFlags |= DF_STATIC_TLS; if (dtFlags) addInt(DT_FLAGS, dtFlags); if (dtFlags1) addInt(DT_FLAGS_1, dtFlags1); // DT_DEBUG is a pointer to debug information used by debuggers at runtime. We // need it for each process, so we don't write it for DSOs. The loader writes // the pointer into this entry. // // DT_DEBUG is the only .dynamic entry that needs to be written to. Some // systems (currently only Fuchsia OS) provide other means to give the // debugger this information. Such systems may choose make .dynamic read-only. // If the target is such a system (used -z rodynamic) don't write DT_DEBUG. if (!config->shared && !config->relocatable && !config->zRodynamic) addInt(DT_DEBUG, 0); if (OutputSection *sec = part.dynStrTab->getParent()) this->link = sec->sectionIndex; if (part.relaDyn->isNeeded() || (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { addInSec(part.relaDyn->dynamicTag, part.relaDyn); entries.push_back({part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)}); bool isRela = config->isRela; addInt(isRela ? DT_RELAENT : DT_RELENT, isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel)); // MIPS dynamic loader does not support RELCOUNT tag. // The problem is in the tight relation between dynamic // relocations and GOT. So do not emit this tag on MIPS. if (config->emachine != EM_MIPS) { size_t numRelativeRels = part.relaDyn->getRelativeRelocCount(); if (config->zCombreloc && numRelativeRels) addInt(isRela ? DT_RELACOUNT : DT_RELCOUNT, numRelativeRels); } } if (part.relrDyn && !part.relrDyn->relocs.empty()) { addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR, part.relrDyn); addSize(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, part.relrDyn->getParent()); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, sizeof(Elf_Relr)); } // .rel[a].plt section usually consists of two parts, containing plt and // iplt relocations. It is possible to have only iplt relocations in the // output. In that case relaPlt is empty and have zero offset, the same offset // as relaIplt has. And we still want to emit proper dynamic tags for that // case, so here we always use relaPlt as marker for the beginning of // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { addInSec(DT_JMPREL, in.relaPlt); entries.push_back({DT_PLTRELSZ, addPltRelSz}); switch (config->emachine) { case EM_MIPS: addInSec(DT_MIPS_PLTGOT, in.gotPlt); break; case EM_SPARCV9: addInSec(DT_PLTGOT, in.plt); break; default: addInSec(DT_PLTGOT, in.gotPlt); break; } addInt(DT_PLTREL, config->isRela ? DT_RELA : DT_REL); } if (config->emachine == EM_AARCH64) { if (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) addInt(DT_AARCH64_BTI_PLT, 0); if (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC) addInt(DT_AARCH64_PAC_PLT, 0); } addInSec(DT_SYMTAB, part.dynSymTab); addInt(DT_SYMENT, sizeof(Elf_Sym)); addInSec(DT_STRTAB, part.dynStrTab); addInt(DT_STRSZ, part.dynStrTab->getSize()); if (!config->zText) addInt(DT_TEXTREL, 0); if (part.gnuHashTab) addInSec(DT_GNU_HASH, part.gnuHashTab); if (part.hashTab) addInSec(DT_HASH, part.hashTab); if (isMain) { if (Out::preinitArray) { addOutSec(DT_PREINIT_ARRAY, Out::preinitArray); addSize(DT_PREINIT_ARRAYSZ, Out::preinitArray); } if (Out::initArray) { addOutSec(DT_INIT_ARRAY, Out::initArray); addSize(DT_INIT_ARRAYSZ, Out::initArray); } if (Out::finiArray) { addOutSec(DT_FINI_ARRAY, Out::finiArray); addSize(DT_FINI_ARRAYSZ, Out::finiArray); } if (Symbol *b = symtab->find(config->init)) if (b->isDefined()) addSym(DT_INIT, b); if (Symbol *b = symtab->find(config->fini)) if (b->isDefined()) addSym(DT_FINI, b); } if (part.verSym && part.verSym->isNeeded()) addInSec(DT_VERSYM, part.verSym); if (part.verDef && part.verDef->isLive()) { addInSec(DT_VERDEF, part.verDef); addInt(DT_VERDEFNUM, getVerDefNum()); } if (part.verNeed && part.verNeed->isNeeded()) { addInSec(DT_VERNEED, part.verNeed); unsigned needNum = 0; for (SharedFile *f : sharedFiles) if (!f->vernauxs.empty()) ++needNum; addInt(DT_VERNEEDNUM, needNum); } if (config->emachine == EM_MIPS) { addInt(DT_MIPS_RLD_VERSION, 1); addInt(DT_MIPS_FLAGS, RHF_NOTPOT); addInt(DT_MIPS_BASE_ADDRESS, target->getImageBase()); addInt(DT_MIPS_SYMTABNO, part.dynSymTab->getNumSymbols()); add(DT_MIPS_LOCAL_GOTNO, [] { return in.mipsGot->getLocalEntriesNum(); }); if (const Symbol *b = in.mipsGot->getFirstGlobalEntry()) addInt(DT_MIPS_GOTSYM, b->dynsymIndex); else addInt(DT_MIPS_GOTSYM, part.dynSymTab->getNumSymbols()); addInSec(DT_PLTGOT, in.mipsGot); if (in.mipsRldMap) { if (!config->pie) addInSec(DT_MIPS_RLD_MAP, in.mipsRldMap); // Store the offset to the .rld_map section // relative to the address of the tag. addInSecRelative(DT_MIPS_RLD_MAP_REL, in.mipsRldMap); } } // DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent, // glibc assumes the old-style BSS PLT layout which we don't support. if (config->emachine == EM_PPC) add(DT_PPC_GOT, [] { return in.got->getVA(); }); // Glink dynamic tag is required by the V2 abi if the plt section isn't empty. if (config->emachine == EM_PPC64 && in.plt->isNeeded()) { // The Glink tag points to 32 bytes before the first lazy symbol resolution // stub, which starts directly after the header. entries.push_back({DT_PPC64_GLINK, [=] { unsigned offset = target->pltHeaderSize - 32; return in.plt->getVA(0) + offset; }}); } addInt(DT_NULL, 0); getParent()->link = this->link; this->size = entries.size() * this->entsize; } template void DynamicSection::writeTo(uint8_t *buf) { auto *p = reinterpret_cast(buf); for (std::pair> &kv : entries) { p->d_tag = kv.first; p->d_un.d_val = kv.second(); ++p; } } uint64_t DynamicReloc::getOffset() const { return inputSec->getVA(offsetInSec); } int64_t DynamicReloc::computeAddend() const { if (useSymVA) return sym->getVA(addend); if (!outputSec) return addend; // See the comment in the DynamicReloc ctor. return getMipsPageAddr(outputSec->addr) + addend; } uint32_t DynamicReloc::getSymIndex(SymbolTableBaseSection *symTab) const { if (sym && !useSymVA) return symTab->getSymbolIndex(sym); return 0; } RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag, int32_t sizeDynamicTag) : SyntheticSection(SHF_ALLOC, type, config->wordsize, name), dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag) {} void RelocationBaseSection::addReloc(RelType dynType, InputSectionBase *isec, uint64_t offsetInSec, Symbol *sym) { addReloc({dynType, isec, offsetInSec, false, sym, 0}); } void RelocationBaseSection::addReloc(RelType dynType, InputSectionBase *inputSec, uint64_t offsetInSec, Symbol *sym, int64_t addend, RelExpr expr, RelType type) { // Write the addends to the relocated address if required. We skip // it if the written value would be zero. if (config->writeAddends && (expr != R_ADDEND || addend != 0)) inputSec->relocations.push_back({expr, type, offsetInSec, addend, sym}); addReloc({dynType, inputSec, offsetInSec, expr != R_ADDEND, sym, addend}); } void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { if (reloc.type == target->relativeRel) ++numRelativeRelocs; relocs.push_back(reloc); } void RelocationBaseSection::finalizeContents() { SymbolTableBaseSection *symTab = getPartition().dynSymTab; // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE // relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that // case. if (symTab && symTab->getParent()) getParent()->link = symTab->getParent()->sectionIndex; else getParent()->link = 0; if (in.relaPlt == this) getParent()->info = in.gotPlt->getParent()->sectionIndex; if (in.relaIplt == this) getParent()->info = in.igotPlt->getParent()->sectionIndex; } RelrBaseSection::RelrBaseSection() : SyntheticSection(SHF_ALLOC, config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, config->wordsize, ".relr.dyn") {} template static void encodeDynamicReloc(SymbolTableBaseSection *symTab, typename ELFT::Rela *p, const DynamicReloc &rel) { if (config->isRela) p->r_addend = rel.computeAddend(); p->r_offset = rel.getOffset(); p->setSymbolAndType(rel.getSymIndex(symTab), rel.type, config->isMips64EL); } template RelocationSection::RelocationSection(StringRef name, bool sort) : RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL, config->isRela ? DT_RELA : DT_REL, config->isRela ? DT_RELASZ : DT_RELSZ), sort(sort) { this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } template void RelocationSection::writeTo(uint8_t *buf) { SymbolTableBaseSection *symTab = getPartition().dynSymTab; // Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to // place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset // is to make results easier to read. if (sort) llvm::stable_sort( relocs, [&](const DynamicReloc &a, const DynamicReloc &b) { return std::make_tuple(a.type != target->relativeRel, a.getSymIndex(symTab), a.getOffset()) < std::make_tuple(b.type != target->relativeRel, b.getSymIndex(symTab), b.getOffset()); }); for (const DynamicReloc &rel : relocs) { encodeDynamicReloc(symTab, reinterpret_cast(buf), rel); buf += config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } } template AndroidPackedRelocationSection::AndroidPackedRelocationSection( StringRef name) : RelocationBaseSection( name, config->isRela ? SHT_ANDROID_RELA : SHT_ANDROID_REL, config->isRela ? DT_ANDROID_RELA : DT_ANDROID_REL, config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ) { this->entsize = 1; } template bool AndroidPackedRelocationSection::updateAllocSize() { // This function computes the contents of an Android-format packed relocation // section. // // This format compresses relocations by using relocation groups to factor out // fields that are common between relocations and storing deltas from previous // relocations in SLEB128 format (which has a short representation for small // numbers). A good example of a relocation type with common fields is // R_*_RELATIVE, which is normally used to represent function pointers in // vtables. In the REL format, each relative relocation has the same r_info // field, and is only different from other relative relocations in terms of // the r_offset field. By sorting relocations by offset, grouping them by // r_info and representing each relocation with only the delta from the // previous offset, each 8-byte relocation can be compressed to as little as 1 // byte (or less with run-length encoding). This relocation packer was able to // reduce the size of the relocation section in an Android Chromium DSO from // 2,911,184 bytes to 174,693 bytes, or 6% of the original size. // // A relocation section consists of a header containing the literal bytes // 'APS2' followed by a sequence of SLEB128-encoded integers. The first two // elements are the total number of relocations in the section and an initial // r_offset value. The remaining elements define a sequence of relocation // groups. Each relocation group starts with a header consisting of the // following elements: // // - the number of relocations in the relocation group // - flags for the relocation group // - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is set) the r_offset delta // for each relocation in the group. // - (if RELOCATION_GROUPED_BY_INFO_FLAG is set) the value of the r_info // field for each relocation in the group. // - (if RELOCATION_GROUP_HAS_ADDEND_FLAG and // RELOCATION_GROUPED_BY_ADDEND_FLAG are set) the r_addend delta for // each relocation in the group. // // Following the relocation group header are descriptions of each of the // relocations in the group. They consist of the following elements: // // - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is not set) the r_offset // delta for this relocation. // - (if RELOCATION_GROUPED_BY_INFO_FLAG is not set) the value of the r_info // field for this relocation. // - (if RELOCATION_GROUP_HAS_ADDEND_FLAG is set and // RELOCATION_GROUPED_BY_ADDEND_FLAG is not set) the r_addend delta for // this relocation. size_t oldSize = relocData.size(); relocData = {'A', 'P', 'S', '2'}; raw_svector_ostream os(relocData); auto add = [&](int64_t v) { encodeSLEB128(v, os); }; // The format header includes the number of relocations and the initial // offset (we set this to zero because the first relocation group will // perform the initial adjustment). add(relocs.size()); add(0); std::vector relatives, nonRelatives; for (const DynamicReloc &rel : relocs) { Elf_Rela r; encodeDynamicReloc(getPartition().dynSymTab, &r, rel); if (r.getType(config->isMips64EL) == target->relativeRel) relatives.push_back(r); else nonRelatives.push_back(r); } llvm::sort(relatives, [](const Elf_Rel &a, const Elf_Rel &b) { return a.r_offset < b.r_offset; }); // Try to find groups of relative relocations which are spaced one word // apart from one another. These generally correspond to vtable entries. The // format allows these groups to be encoded using a sort of run-length // encoding, but each group will cost 7 bytes in addition to the offset from // the previous group, so it is only profitable to do this for groups of // size 8 or larger. std::vector ungroupedRelatives; std::vector> relativeGroups; for (auto i = relatives.begin(), e = relatives.end(); i != e;) { std::vector group; do { group.push_back(*i++); } while (i != e && (i - 1)->r_offset + config->wordsize == i->r_offset); if (group.size() < 8) ungroupedRelatives.insert(ungroupedRelatives.end(), group.begin(), group.end()); else relativeGroups.emplace_back(std::move(group)); } // For non-relative relocations, we would like to: // 1. Have relocations with the same symbol offset to be consecutive, so // that the runtime linker can speed-up symbol lookup by implementing an // 1-entry cache. // 2. Group relocations by r_info to reduce the size of the relocation // section. // Since the symbol offset is the high bits in r_info, sorting by r_info // allows us to do both. // // For Rela, we also want to sort by r_addend when r_info is the same. This // enables us to group by r_addend as well. llvm::stable_sort(nonRelatives, [](const Elf_Rela &a, const Elf_Rela &b) { if (a.r_info != b.r_info) return a.r_info < b.r_info; if (config->isRela) return a.r_addend < b.r_addend; return false; }); // Group relocations with the same r_info. Note that each group emits a group // header and that may make the relocation section larger. It is hard to // estimate the size of a group header as the encoded size of that varies // based on r_info. However, we can approximate this trade-off by the number // of values encoded. Each group header contains 3 values, and each relocation // in a group encodes one less value, as compared to when it is not grouped. // Therefore, we only group relocations if there are 3 or more of them with // the same r_info. // // For Rela, the addend for most non-relative relocations is zero, and thus we // can usually get a smaller relocation section if we group relocations with 0 // addend as well. std::vector ungroupedNonRelatives; std::vector> nonRelativeGroups; for (auto i = nonRelatives.begin(), e = nonRelatives.end(); i != e;) { auto j = i + 1; while (j != e && i->r_info == j->r_info && (!config->isRela || i->r_addend == j->r_addend)) ++j; if (j - i < 3 || (config->isRela && i->r_addend != 0)) ungroupedNonRelatives.insert(ungroupedNonRelatives.end(), i, j); else nonRelativeGroups.emplace_back(i, j); i = j; } // Sort ungrouped relocations by offset to minimize the encoded length. llvm::sort(ungroupedNonRelatives, [](const Elf_Rela &a, const Elf_Rela &b) { return a.r_offset < b.r_offset; }); unsigned hasAddendIfRela = config->isRela ? RELOCATION_GROUP_HAS_ADDEND_FLAG : 0; uint64_t offset = 0; uint64_t addend = 0; // Emit the run-length encoding for the groups of adjacent relative // relocations. Each group is represented using two groups in the packed // format. The first is used to set the current offset to the start of the // group (and also encodes the first relocation), and the second encodes the // remaining relocations. for (std::vector &g : relativeGroups) { // The first relocation in the group. add(1); add(RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG | RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela); add(g[0].r_offset - offset); add(target->relativeRel); if (config->isRela) { add(g[0].r_addend - addend); addend = g[0].r_addend; } // The remaining relocations. add(g.size() - 1); add(RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG | RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela); add(config->wordsize); add(target->relativeRel); if (config->isRela) { for (auto i = g.begin() + 1, e = g.end(); i != e; ++i) { add(i->r_addend - addend); addend = i->r_addend; } } offset = g.back().r_offset; } // Now the ungrouped relatives. if (!ungroupedRelatives.empty()) { add(ungroupedRelatives.size()); add(RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela); add(target->relativeRel); for (Elf_Rela &r : ungroupedRelatives) { add(r.r_offset - offset); offset = r.r_offset; if (config->isRela) { add(r.r_addend - addend); addend = r.r_addend; } } } // Grouped non-relatives. for (ArrayRef g : nonRelativeGroups) { add(g.size()); add(RELOCATION_GROUPED_BY_INFO_FLAG); add(g[0].r_info); for (const Elf_Rela &r : g) { add(r.r_offset - offset); offset = r.r_offset; } addend = 0; } // Finally the ungrouped non-relative relocations. if (!ungroupedNonRelatives.empty()) { add(ungroupedNonRelatives.size()); add(hasAddendIfRela); for (Elf_Rela &r : ungroupedNonRelatives) { add(r.r_offset - offset); offset = r.r_offset; add(r.r_info); if (config->isRela) { add(r.r_addend - addend); addend = r.r_addend; } } } // Don't allow the section to shrink; otherwise the size of the section can // oscillate infinitely. if (relocData.size() < oldSize) relocData.append(oldSize - relocData.size(), 0); // Returns whether the section size changed. We need to keep recomputing both // section layout and the contents of this section until the size converges // because changing this section's size can affect section layout, which in // turn can affect the sizes of the LEB-encoded integers stored in this // section. return relocData.size() != oldSize; } template RelrSection::RelrSection() { this->entsize = config->wordsize; } template bool RelrSection::updateAllocSize() { // This function computes the contents of an SHT_RELR packed relocation // section. // // Proposal for adding SHT_RELR sections to generic-abi is here: // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg // // The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks // like [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] // // i.e. start with an address, followed by any number of bitmaps. The address // entry encodes 1 relocation. The subsequent bitmap entries encode up to 63 // relocations each, at subsequent offsets following the last address entry. // // The bitmap entries must have 1 in the least significant bit. The assumption // here is that an address cannot have 1 in lsb. Odd addresses are not // supported. // // Excluding the least significant bit in the bitmap, each non-zero bit in // the bitmap represents a relocation to be applied to a corresponding machine // word that follows the base address word. The second least significant bit // represents the machine word immediately following the initial address, and // each bit that follows represents the next word, in linear order. As such, // a single bitmap can encode up to 31 relocations in a 32-bit object, and // 63 relocations in a 64-bit object. // // This encoding has a couple of interesting properties: // 1. Looking at any entry, it is clear whether it's an address or a bitmap: // even means address, odd means bitmap. // 2. Just a simple list of addresses is a valid encoding. size_t oldSize = relrRelocs.size(); relrRelocs.clear(); // Same as Config->Wordsize but faster because this is a compile-time // constant. const size_t wordsize = sizeof(typename ELFT::uint); // Number of bits to use for the relocation offsets bitmap. // Must be either 63 or 31. const size_t nBits = wordsize * 8 - 1; // Get offsets for all relative relocations and sort them. std::vector offsets; for (const RelativeReloc &rel : relocs) offsets.push_back(rel.getOffset()); llvm::sort(offsets); // For each leading relocation, find following ones that can be folded // as a bitmap and fold them. for (size_t i = 0, e = offsets.size(); i < e;) { // Add a leading relocation. relrRelocs.push_back(Elf_Relr(offsets[i])); uint64_t base = offsets[i] + wordsize; ++i; // Find foldable relocations to construct bitmaps. while (i < e) { uint64_t bitmap = 0; while (i < e) { uint64_t delta = offsets[i] - base; // If it is too far, it cannot be folded. if (delta >= nBits * wordsize) break; // If it is not a multiple of wordsize away, it cannot be folded. if (delta % wordsize) break; // Fold it. bitmap |= 1ULL << (delta / wordsize); ++i; } if (!bitmap) break; relrRelocs.push_back(Elf_Relr((bitmap << 1) | 1)); base += nBits * wordsize; } } // Don't allow the section to shrink; otherwise the size of the section can // oscillate infinitely. Trailing 1s do not decode to more relocations. if (relrRelocs.size() < oldSize) { log(".relr.dyn needs " + Twine(oldSize - relrRelocs.size()) + " padding word(s)"); relrRelocs.resize(oldSize, Elf_Relr(1)); } return relrRelocs.size() != oldSize; } SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &strTabSec) : SyntheticSection(strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, strTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, config->wordsize, strTabSec.isDynamic() ? ".dynsym" : ".symtab"), strTabSec(strTabSec) {} // Orders symbols according to their positions in the GOT, // in compliance with MIPS ABI rules. // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf static bool sortMipsSymbols(const SymbolTableEntry &l, const SymbolTableEntry &r) { // Sort entries related to non-local preemptible symbols by GOT indexes. // All other entries go to the beginning of a dynsym in arbitrary order. if (l.sym->isInGot() && r.sym->isInGot()) return l.sym->gotIndex < r.sym->gotIndex; if (!l.sym->isInGot() && !r.sym->isInGot()) return false; return !l.sym->isInGot(); } void SymbolTableBaseSection::finalizeContents() { if (OutputSection *sec = strTabSec.getParent()) getParent()->link = sec->sectionIndex; if (this->type != SHT_DYNSYM) { sortSymTabSymbols(); return; } // If it is a .dynsym, there should be no local symbols, but we need // to do a few things for the dynamic linker. // Section's Info field has the index of the first non-local symbol. // Because the first symbol entry is a null entry, 1 is the first. getParent()->info = 1; if (getPartition().gnuHashTab) { // NB: It also sorts Symbols to meet the GNU hash table requirements. getPartition().gnuHashTab->addSymbols(symbols); } else if (config->emachine == EM_MIPS) { llvm::stable_sort(symbols, sortMipsSymbols); } // Only the main partition's dynsym indexes are stored in the symbols // themselves. All other partitions use a lookup table. if (this == mainPart->dynSymTab) { size_t i = 0; for (const SymbolTableEntry &s : symbols) s.sym->dynsymIndex = ++i; } } // The ELF spec requires that all local symbols precede global symbols, so we // sort symbol entries in this function. (For .dynsym, we don't do that because // symbols for dynamic linking are inherently all globals.) // // Aside from above, we put local symbols in groups starting with the STT_FILE // symbol. That is convenient for purpose of identifying where are local symbols // coming from. void SymbolTableBaseSection::sortSymTabSymbols() { // Move all local symbols before global symbols. auto e = std::stable_partition( symbols.begin(), symbols.end(), [](const SymbolTableEntry &s) { return s.sym->isLocal() || s.sym->computeBinding() == STB_LOCAL; }); size_t numLocals = e - symbols.begin(); getParent()->info = numLocals + 1; // We want to group the local symbols by file. For that we rebuild the local // part of the symbols vector. We do not need to care about the STT_FILE // symbols, they are already naturally placed first in each group. That // happens because STT_FILE is always the first symbol in the object and hence // precede all other local symbols we add for a file. MapVector> arr; for (const SymbolTableEntry &s : llvm::make_range(symbols.begin(), e)) arr[s.sym->file].push_back(s); auto i = symbols.begin(); for (std::pair> &p : arr) for (SymbolTableEntry &entry : p.second) *i++ = entry; } void SymbolTableBaseSection::addSymbol(Symbol *b) { // Adding a local symbol to a .dynsym is a bug. assert(this->type != SHT_DYNSYM || !b->isLocal()); bool hashIt = b->isLocal(); symbols.push_back({b, strTabSec.addString(b->getName(), hashIt)}); } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { if (this == mainPart->dynSymTab) return sym->dynsymIndex; // Initializes symbol lookup tables lazily. This is used only for -r, // -emit-relocs and dynsyms in partitions other than the main one. llvm::call_once(onceFlag, [&] { symbolIndexMap.reserve(symbols.size()); size_t i = 0; for (const SymbolTableEntry &e : symbols) { if (e.sym->type == STT_SECTION) sectionIndexMap[e.sym->getOutputSection()] = ++i; else symbolIndexMap[e.sym] = ++i; } }); // Section symbols are mapped based on their output sections // to maintain their semantics. if (sym->type == STT_SECTION) return sectionIndexMap.lookup(sym->getOutputSection()); return symbolIndexMap.lookup(sym); } template SymbolTableSection::SymbolTableSection(StringTableSection &strTabSec) : SymbolTableBaseSection(strTabSec) { this->entsize = sizeof(Elf_Sym); } static BssSection *getCommonSec(Symbol *sym) { if (!config->defineCommon) if (auto *d = dyn_cast(sym)) return dyn_cast_or_null(d->section); return nullptr; } static uint32_t getSymSectionIndex(Symbol *sym) { if (getCommonSec(sym)) return SHN_COMMON; if (!isa(sym) || sym->needsPltAddr) return SHN_UNDEF; if (const OutputSection *os = sym->getOutputSection()) return os->sectionIndex >= SHN_LORESERVE ? (uint32_t)SHN_XINDEX : os->sectionIndex; return SHN_ABS; } // Write the internal symbol table contents to the output symbol table. template void SymbolTableSection::writeTo(uint8_t *buf) { // The first entry is a null entry as per the ELF spec. memset(buf, 0, sizeof(Elf_Sym)); buf += sizeof(Elf_Sym); auto *eSym = reinterpret_cast(buf); for (SymbolTableEntry &ent : symbols) { Symbol *sym = ent.sym; bool isDefinedHere = type == SHT_SYMTAB || sym->partition == partition; // Set st_info and st_other. eSym->st_other = 0; if (sym->isLocal()) { eSym->setBindingAndType(STB_LOCAL, sym->type); } else { eSym->setBindingAndType(sym->computeBinding(), sym->type); eSym->setVisibility(sym->visibility); } // The 3 most significant bits of st_other are used by OpenPOWER ABI. // See getPPC64GlobalEntryToLocalEntryOffset() for more details. if (config->emachine == EM_PPC64) eSym->st_other |= sym->stOther & 0xe0; eSym->st_name = ent.strTabOffset; if (isDefinedHere) eSym->st_shndx = getSymSectionIndex(ent.sym); else eSym->st_shndx = 0; // Copy symbol size if it is a defined symbol. st_size is not significant // for undefined symbols, so whether copying it or not is up to us if that's // the case. We'll leave it as zero because by not setting a value, we can // get the exact same outputs for two sets of input files that differ only // in undefined symbol size in DSOs. if (eSym->st_shndx == SHN_UNDEF || !isDefinedHere) eSym->st_size = 0; else eSym->st_size = sym->getSize(); // st_value is usually an address of a symbol, but that has a // special meaining for uninstantiated common symbols (this can // occur if -r is given). if (BssSection *commonSec = getCommonSec(ent.sym)) eSym->st_value = commonSec->alignment; else if (isDefinedHere) eSym->st_value = sym->getVA(); else eSym->st_value = 0; ++eSym; } // On MIPS we need to mark symbol which has a PLT entry and requires // pointer equality by STO_MIPS_PLT flag. That is necessary to help // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. // https://sourceware.org/ml/binutils/2008-07/txt00000.txt if (config->emachine == EM_MIPS) { auto *eSym = reinterpret_cast(buf); for (SymbolTableEntry &ent : symbols) { Symbol *sym = ent.sym; if (sym->isInPlt() && sym->needsPltAddr) eSym->st_other |= STO_MIPS_PLT; if (isMicroMips()) { // We already set the less-significant bit for symbols // marked by the `STO_MIPS_MICROMIPS` flag and for microMIPS PLT // records. That allows us to distinguish such symbols in // the `MIPS::relocateOne()` routine. Now we should // clear that bit for non-dynamic symbol table, so tools // like `objdump` will be able to deal with a correct // symbol position. if (sym->isDefined() && ((sym->stOther & STO_MIPS_MICROMIPS) || sym->needsPltAddr)) { if (!strTabSec.isDynamic()) eSym->st_value &= ~1; eSym->st_other |= STO_MIPS_MICROMIPS; } } if (config->relocatable) if (auto *d = dyn_cast(sym)) if (isMipsPIC(d)) eSym->st_other |= STO_MIPS_PIC; ++eSym; } } } SymtabShndxSection::SymtabShndxSection() : SyntheticSection(0, SHT_SYMTAB_SHNDX, 4, ".symtab_shndx") { this->entsize = 4; } void SymtabShndxSection::writeTo(uint8_t *buf) { // We write an array of 32 bit values, where each value has 1:1 association // with an entry in .symtab. If the corresponding entry contains SHN_XINDEX, // we need to write actual index, otherwise, we must write SHN_UNDEF(0). buf += 4; // Ignore .symtab[0] entry. for (const SymbolTableEntry &entry : in.symTab->getSymbols()) { if (getSymSectionIndex(entry.sym) == SHN_XINDEX) write32(buf, entry.sym->getOutputSection()->sectionIndex); buf += 4; } } bool SymtabShndxSection::isNeeded() const { // SHT_SYMTAB can hold symbols with section indices values up to // SHN_LORESERVE. If we need more, we want to use extension SHT_SYMTAB_SHNDX // section. Problem is that we reveal the final section indices a bit too // late, and we do not know them here. For simplicity, we just always create // a .symtab_shndx section when the amount of output sections is huge. size_t size = 0; for (BaseCommand *base : script->sectionCommands) if (isa(base)) ++size; return size >= SHN_LORESERVE; } void SymtabShndxSection::finalizeContents() { getParent()->link = in.symTab->getParent()->sectionIndex; } size_t SymtabShndxSection::getSize() const { return in.symTab->getNumSymbols() * 4; } // .hash and .gnu.hash sections contain on-disk hash tables that map // symbol names to their dynamic symbol table indices. Their purpose // is to help the dynamic linker resolve symbols quickly. If ELF files // don't have them, the dynamic linker has to do linear search on all // dynamic symbols, which makes programs slower. Therefore, a .hash // section is added to a DSO by default. A .gnu.hash is added if you // give the -hash-style=gnu or -hash-style=both option. // // The Unix semantics of resolving dynamic symbols is somewhat expensive. // Each ELF file has a list of DSOs that the ELF file depends on and a // list of dynamic symbols that need to be resolved from any of the // DSOs. That means resolving all dynamic symbols takes O(m)*O(n) // where m is the number of DSOs and n is the number of dynamic // symbols. For modern large programs, both m and n are large. So // making each step faster by using hash tables substiantially // improves time to load programs. // // (Note that this is not the only way to design the shared library. // For instance, the Windows DLL takes a different approach. On // Windows, each dynamic symbol has a name of DLL from which the symbol // has to be resolved. That makes the cost of symbol resolution O(n). // This disables some hacky techniques you can use on Unix such as // LD_PRELOAD, but this is arguably better semantics than the Unix ones.) // // Due to historical reasons, we have two different hash tables, .hash // and .gnu.hash. They are for the same purpose, and .gnu.hash is a new // and better version of .hash. .hash is just an on-disk hash table, but // .gnu.hash has a bloom filter in addition to a hash table to skip // DSOs very quickly. If you are sure that your dynamic linker knows // about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a // safe bet is to specify -hash-style=both for backward compatibility. GnuHashTableSection::GnuHashTableSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, config->wordsize, ".gnu.hash") { } void GnuHashTableSection::finalizeContents() { if (OutputSection *sec = getPartition().dynSymTab->getParent()) getParent()->link = sec->sectionIndex; // Computes bloom filter size in word size. We want to allocate 12 // bits for each symbol. It must be a power of two. if (symbols.empty()) { maskWords = 1; } else { uint64_t numBits = symbols.size() * 12; maskWords = NextPowerOf2(numBits / (config->wordsize * 8)); } size = 16; // Header size += config->wordsize * maskWords; // Bloom filter size += nBuckets * 4; // Hash buckets size += symbols.size() * 4; // Hash values } void GnuHashTableSection::writeTo(uint8_t *buf) { // The output buffer is not guaranteed to be zero-cleared because we pre- // fill executable sections with trap instructions. This is a precaution // for that case, which happens only when -no-rosegment is given. memset(buf, 0, size); // Write a header. write32(buf, nBuckets); write32(buf + 4, getPartition().dynSymTab->getNumSymbols() - symbols.size()); write32(buf + 8, maskWords); write32(buf + 12, Shift2); buf += 16; // Write a bloom filter and a hash table. writeBloomFilter(buf); buf += config->wordsize * maskWords; writeHashTable(buf); } // This function writes a 2-bit bloom filter. This bloom filter alone // usually filters out 80% or more of all symbol lookups [1]. // The dynamic linker uses the hash table only when a symbol is not // filtered out by a bloom filter. // // [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2), // p.9, https://www.akkadia.org/drepper/dsohowto.pdf void GnuHashTableSection::writeBloomFilter(uint8_t *buf) { unsigned c = config->is64 ? 64 : 32; for (const Entry &sym : symbols) { // When C = 64, we choose a word with bits [6:...] and set 1 to two bits in // the word using bits [0:5] and [26:31]. size_t i = (sym.hash / c) & (maskWords - 1); uint64_t val = readUint(buf + i * config->wordsize); val |= uint64_t(1) << (sym.hash % c); val |= uint64_t(1) << ((sym.hash >> Shift2) % c); writeUint(buf + i * config->wordsize, val); } } void GnuHashTableSection::writeHashTable(uint8_t *buf) { uint32_t *buckets = reinterpret_cast(buf); uint32_t oldBucket = -1; uint32_t *values = buckets + nBuckets; for (auto i = symbols.begin(), e = symbols.end(); i != e; ++i) { // Write a hash value. It represents a sequence of chains that share the // same hash modulo value. The last element of each chain is terminated by // LSB 1. uint32_t hash = i->hash; bool isLastInChain = (i + 1) == e || i->bucketIdx != (i + 1)->bucketIdx; hash = isLastInChain ? hash | 1 : hash & ~1; write32(values++, hash); if (i->bucketIdx == oldBucket) continue; // Write a hash bucket. Hash buckets contain indices in the following hash // value table. write32(buckets + i->bucketIdx, getPartition().dynSymTab->getSymbolIndex(i->sym)); oldBucket = i->bucketIdx; } } static uint32_t hashGnu(StringRef name) { uint32_t h = 5381; for (uint8_t c : name) h = (h << 5) + h + c; return h; } // Add symbols to this symbol hash table. Note that this function // destructively sort a given vector -- which is needed because // GNU-style hash table places some sorting requirements. void GnuHashTableSection::addSymbols(std::vector &v) { // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce // its type correctly. std::vector::iterator mid = std::stable_partition(v.begin(), v.end(), [&](const SymbolTableEntry &s) { return !s.sym->isDefined() || s.sym->partition != partition; }); // We chose load factor 4 for the on-disk hash table. For each hash // collision, the dynamic linker will compare a uint32_t hash value. // Since the integer comparison is quite fast, we believe we can // make the load factor even larger. 4 is just a conservative choice. // // Note that we don't want to create a zero-sized hash table because // Android loader as of 2018 doesn't like a .gnu.hash containing such // table. If that's the case, we create a hash table with one unused // dummy slot. nBuckets = std::max((v.end() - mid) / 4, 1); if (mid == v.end()) return; for (SymbolTableEntry &ent : llvm::make_range(mid, v.end())) { Symbol *b = ent.sym; uint32_t hash = hashGnu(b->getName()); uint32_t bucketIdx = hash % nBuckets; symbols.push_back({b, ent.strTabOffset, hash, bucketIdx}); } llvm::stable_sort(symbols, [](const Entry &l, const Entry &r) { return l.bucketIdx < r.bucketIdx; }); v.erase(mid, v.end()); for (const Entry &ent : symbols) v.push_back({ent.sym, ent.strTabOffset}); } HashTableSection::HashTableSection() : SyntheticSection(SHF_ALLOC, SHT_HASH, 4, ".hash") { this->entsize = 4; } void HashTableSection::finalizeContents() { SymbolTableBaseSection *symTab = getPartition().dynSymTab; if (OutputSection *sec = symTab->getParent()) getParent()->link = sec->sectionIndex; unsigned numEntries = 2; // nbucket and nchain. numEntries += symTab->getNumSymbols(); // The chain entries. // Create as many buckets as there are symbols. numEntries += symTab->getNumSymbols(); this->size = numEntries * 4; } void HashTableSection::writeTo(uint8_t *buf) { SymbolTableBaseSection *symTab = getPartition().dynSymTab; // See comment in GnuHashTableSection::writeTo. memset(buf, 0, size); unsigned numSymbols = symTab->getNumSymbols(); uint32_t *p = reinterpret_cast(buf); write32(p++, numSymbols); // nbucket write32(p++, numSymbols); // nchain uint32_t *buckets = p; uint32_t *chains = p + numSymbols; for (const SymbolTableEntry &s : symTab->getSymbols()) { Symbol *sym = s.sym; StringRef name = sym->getName(); unsigned i = sym->dynsymIndex; uint32_t hash = hashSysV(name) % numSymbols; chains[i] = buckets[hash]; write32(buckets + hash, i); } } PltSection::PltSection() : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), headerSize(target->pltHeaderSize) { // On PowerPC, this section contains lazy symbol resolvers. - if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { + if (config->emachine == EM_PPC64) { name = ".glink"; alignment = 4; - // PLTresolve is at the end. - if (config->emachine == EM_PPC) - footerSize = 64; } // On x86 when IBT is enabled, this section contains the second PLT (lazy // symbol resolvers). if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) name = ".plt.sec"; // The PLT needs to be writable on SPARC as the dynamic linker will // modify the instructions in the PLT entries. if (config->emachine == EM_SPARCV9) this->flags |= SHF_WRITE; } void PltSection::writeTo(uint8_t *buf) { - if (config->emachine == EM_PPC) { - writePPC32GlinkSection(buf, entries.size()); - return; - } - // At beginning of PLT, we have code to call the dynamic // linker to resolve dynsyms at runtime. Write such code. target->writePltHeader(buf); size_t off = headerSize; for (const Symbol *sym : entries) { target->writePlt(buf + off, *sym, getVA() + off); off += target->pltEntrySize; } } void PltSection::addEntry(Symbol &sym) { sym.pltIndex = entries.size(); entries.push_back(&sym); } size_t PltSection::getSize() const { - return headerSize + entries.size() * target->pltEntrySize + footerSize; + return headerSize + entries.size() * target->pltEntrySize; } bool PltSection::isNeeded() const { // For -z retpolineplt, .iplt needs the .plt header. return !entries.empty() || (config->zRetpolineplt && in.iplt->isNeeded()); } // Used by ARM to add mapping symbols in the PLT section, which aid // disassembly. void PltSection::addSymbols() { target->addPltHeaderSymbols(*this); size_t off = headerSize; for (size_t i = 0; i < entries.size(); ++i) { target->addPltSymbols(*this, off); off += target->pltEntrySize; } } IpltSection::IpltSection() : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".iplt") { if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { name = ".glink"; alignment = 4; } } void IpltSection::writeTo(uint8_t *buf) { uint32_t off = 0; for (const Symbol *sym : entries) { target->writeIplt(buf + off, *sym, getVA() + off); off += target->ipltEntrySize; } } size_t IpltSection::getSize() const { return entries.size() * target->ipltEntrySize; } void IpltSection::addEntry(Symbol &sym) { sym.pltIndex = entries.size(); entries.push_back(&sym); } // ARM uses mapping symbols to aid disassembly. void IpltSection::addSymbols() { size_t off = 0; for (size_t i = 0, e = entries.size(); i != e; ++i) { target->addPltSymbols(*this, off); off += target->pltEntrySize; } +} + +PPC32GlinkSection::PPC32GlinkSection() { + name = ".glink"; + alignment = 4; +} + +void PPC32GlinkSection::writeTo(uint8_t *buf) { + writePPC32GlinkSection(buf, entries.size()); +} + +size_t PPC32GlinkSection::getSize() const { + return headerSize + entries.size() * target->pltEntrySize + footerSize; } // This is an x86-only extra PLT section and used only when a security // enhancement feature called CET is enabled. In this comment, I'll explain what // the feature is and why we have two PLT sections if CET is enabled. // // So, what does CET do? CET introduces a new restriction to indirect jump // instructions. CET works this way. Assume that CET is enabled. Then, if you // execute an indirect jump instruction, the processor verifies that a special // "landing pad" instruction (which is actually a repurposed NOP instruction and // now called "endbr32" or "endbr64") is at the jump target. If the jump target // does not start with that instruction, the processor raises an exception // instead of continuing executing code. // // If CET is enabled, the compiler emits endbr to all locations where indirect // jumps may jump to. // // This mechanism makes it extremely hard to transfer the control to a middle of // a function that is not supporsed to be a indirect jump target, preventing // certain types of attacks such as ROP or JOP. // // Note that the processors in the market as of 2019 don't actually support the // feature. Only the spec is available at the moment. // // Now, I'll explain why we have this extra PLT section for CET. // // Since you can indirectly jump to a PLT entry, we have to make PLT entries // start with endbr. The problem is there's no extra space for endbr (which is 4 // bytes long), as the PLT entry is only 16 bytes long and all bytes are already // used. // // In order to deal with the issue, we split a PLT entry into two PLT entries. // Remember that each PLT entry contains code to jump to an address read from // .got.plt AND code to resolve a dynamic symbol lazily. With the 2-PLT scheme, // the former code is written to .plt.sec, and the latter code is written to // .plt. // // Lazy symbol resolution in the 2-PLT scheme works in the usual way, except // that the regular .plt is now called .plt.sec and .plt is repurposed to // contain only code for lazy symbol resolution. // // In other words, this is how the 2-PLT scheme works. Application code is // supposed to jump to .plt.sec to call an external function. Each .plt.sec // entry contains code to read an address from a corresponding .got.plt entry // and jump to that address. Addresses in .got.plt initially point to .plt, so // when an application calls an external function for the first time, the // control is transferred to a function that resolves a symbol name from // external shared object files. That function then rewrites a .got.plt entry // with a resolved address, so that the subsequent function calls directly jump // to a desired location from .plt.sec. // // There is an open question as to whether the 2-PLT scheme was desirable or // not. We could have simply extended the PLT entry size to 32-bytes to // accommodate endbr, and that scheme would have been much simpler than the // 2-PLT scheme. One reason to split PLT was, by doing that, we could keep hot // code (.plt.sec) from cold code (.plt). But as far as I know no one proved // that the optimization actually makes a difference. // // That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools // depend on it, so we implement the ABI. IBTPltSection::IBTPltSection() : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt") {} void IBTPltSection::writeTo(uint8_t *buf) { target->writeIBTPlt(buf, in.plt->getNumEntries()); } size_t IBTPltSection::getSize() const { // 16 is the header size of .plt. return 16 + in.plt->getNumEntries() * target->pltEntrySize; } // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef s) { uint32_t h = 0; for (uint8_t c : s) h = h * 67 + toLower(c) - 113; return h; } GdbIndexSection::GdbIndexSection() : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {} // Returns the desired size of an on-disk hash table for a .gdb_index section. // There's a tradeoff between size and collision rate. We aim 75% utilization. size_t GdbIndexSection::computeSymtabSize() const { return std::max(NextPowerOf2(symbols.size() * 4 / 3), 1024); } // Compute the output section size. void GdbIndexSection::initOutputSize() { size = sizeof(GdbIndexHeader) + computeSymtabSize() * 8; for (GdbChunk &chunk : chunks) size += chunk.compilationUnits.size() * 16 + chunk.addressAreas.size() * 20; // Add the constant pool size if exists. if (!symbols.empty()) { GdbSymbol &sym = symbols.back(); size += sym.nameOff + sym.name.size() + 1; } } static std::vector getDebugInfoSections() { std::vector ret; for (InputSectionBase *s : inputSections) if (InputSection *isec = dyn_cast(s)) if (isec->name == ".debug_info") ret.push_back(isec); return ret; } static std::vector readCuList(DWARFContext &dwarf) { std::vector ret; for (std::unique_ptr &cu : dwarf.compile_units()) ret.push_back({cu->getOffset(), cu->getLength() + 4}); return ret; } static std::vector readAddressAreas(DWARFContext &dwarf, InputSection *sec) { std::vector ret; uint32_t cuIdx = 0; for (std::unique_ptr &cu : dwarf.compile_units()) { if (Error e = cu->tryExtractDIEsIfNeeded(false)) { error(toString(sec) + ": " + toString(std::move(e))); return {}; } Expected ranges = cu->collectAddressRanges(); if (!ranges) { error(toString(sec) + ": " + toString(ranges.takeError())); return {}; } ArrayRef sections = sec->file->getSections(); for (DWARFAddressRange &r : *ranges) { if (r.SectionIndex == -1ULL) continue; InputSectionBase *s = sections[r.SectionIndex]; if (!s || s == &InputSection::discarded || !s->isLive()) continue; // Range list with zero size has no effect. if (r.LowPC == r.HighPC) continue; auto *isec = cast(s); uint64_t offset = isec->getOffsetInFile(); ret.push_back({isec, r.LowPC - offset, r.HighPC - offset, cuIdx}); } ++cuIdx; } return ret; } template static std::vector readPubNamesAndTypes(const LLDDwarfObj &obj, const std::vector &cus) { const DWARFSection &pubNames = obj.getGnuPubnamesSection(); const DWARFSection &pubTypes = obj.getGnuPubtypesSection(); std::vector ret; for (const DWARFSection *pub : {&pubNames, &pubTypes}) { DWARFDebugPubTable table(obj, *pub, config->isLE, true); for (const DWARFDebugPubTable::Set &set : table.getData()) { // The value written into the constant pool is kind << 24 | cuIndex. As we // don't know how many compilation units precede this object to compute // cuIndex, we compute (kind << 24 | cuIndexInThisObject) instead, and add // the number of preceding compilation units later. uint32_t i = llvm::partition_point(cus, [&](GdbIndexSection::CuEntry cu) { return cu.cuOffset < set.Offset; }) - cus.begin(); for (const DWARFDebugPubTable::Entry &ent : set.Entries) ret.push_back({{ent.Name, computeGdbHash(ent.Name)}, (ent.Descriptor.toBits() << 24) | i}); } } return ret; } // Create a list of symbols from a given list of symbol names and types // by uniquifying them by name. static std::vector createSymbols(ArrayRef> nameAttrs, const std::vector &chunks) { using GdbSymbol = GdbIndexSection::GdbSymbol; using NameAttrEntry = GdbIndexSection::NameAttrEntry; // For each chunk, compute the number of compilation units preceding it. uint32_t cuIdx = 0; std::vector cuIdxs(chunks.size()); for (uint32_t i = 0, e = chunks.size(); i != e; ++i) { cuIdxs[i] = cuIdx; cuIdx += chunks[i].compilationUnits.size(); } // The number of symbols we will handle in this function is of the order // of millions for very large executables, so we use multi-threading to // speed it up. size_t numShards = 32; size_t concurrency = 1; if (threadsEnabled) concurrency = std::min(PowerOf2Floor(hardware_concurrency()), numShards); // A sharded map to uniquify symbols by name. std::vector> map(numShards); size_t shift = 32 - countTrailingZeros(numShards); // Instantiate GdbSymbols while uniqufying them by name. std::vector> symbols(numShards); parallelForEachN(0, concurrency, [&](size_t threadId) { uint32_t i = 0; for (ArrayRef entries : nameAttrs) { for (const NameAttrEntry &ent : entries) { size_t shardId = ent.name.hash() >> shift; if ((shardId & (concurrency - 1)) != threadId) continue; uint32_t v = ent.cuIndexAndAttrs + cuIdxs[i]; size_t &idx = map[shardId][ent.name]; if (idx) { symbols[shardId][idx - 1].cuVector.push_back(v); continue; } idx = symbols[shardId].size() + 1; symbols[shardId].push_back({ent.name, {v}, 0, 0}); } ++i; } }); size_t numSymbols = 0; for (ArrayRef v : symbols) numSymbols += v.size(); // The return type is a flattened vector, so we'll copy each vector // contents to Ret. std::vector ret; ret.reserve(numSymbols); for (std::vector &vec : symbols) for (GdbSymbol &sym : vec) ret.push_back(std::move(sym)); // CU vectors and symbol names are adjacent in the output file. // We can compute their offsets in the output file now. size_t off = 0; for (GdbSymbol &sym : ret) { sym.cuVectorOff = off; off += (sym.cuVector.size() + 1) * 4; } for (GdbSymbol &sym : ret) { sym.nameOff = off; off += sym.name.size() + 1; } return ret; } // Returns a newly-created .gdb_index section. template GdbIndexSection *GdbIndexSection::create() { std::vector sections = getDebugInfoSections(); // .debug_gnu_pub{names,types} are useless in executables. // They are present in input object files solely for creating // a .gdb_index. So we can remove them from the output. for (InputSectionBase *s : inputSections) if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes") s->markDead(); std::vector chunks(sections.size()); std::vector> nameAttrs(sections.size()); parallelForEachN(0, sections.size(), [&](size_t i) { ObjFile *file = sections[i]->getFile(); DWARFContext dwarf(std::make_unique>(file)); chunks[i].sec = sections[i]; chunks[i].compilationUnits = readCuList(dwarf); chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]); nameAttrs[i] = readPubNamesAndTypes( static_cast &>(dwarf.getDWARFObj()), chunks[i].compilationUnits); }); auto *ret = make(); ret->chunks = std::move(chunks); ret->symbols = createSymbols(nameAttrs, ret->chunks); ret->initOutputSize(); return ret; } void GdbIndexSection::writeTo(uint8_t *buf) { // Write the header. auto *hdr = reinterpret_cast(buf); uint8_t *start = buf; hdr->version = 7; buf += sizeof(*hdr); // Write the CU list. hdr->cuListOff = buf - start; for (GdbChunk &chunk : chunks) { for (CuEntry &cu : chunk.compilationUnits) { write64le(buf, chunk.sec->outSecOff + cu.cuOffset); write64le(buf + 8, cu.cuLength); buf += 16; } } // Write the address area. hdr->cuTypesOff = buf - start; hdr->addressAreaOff = buf - start; uint32_t cuOff = 0; for (GdbChunk &chunk : chunks) { for (AddressEntry &e : chunk.addressAreas) { uint64_t baseAddr = e.section->getVA(0); write64le(buf, baseAddr + e.lowAddress); write64le(buf + 8, baseAddr + e.highAddress); write32le(buf + 16, e.cuIndex + cuOff); buf += 20; } cuOff += chunk.compilationUnits.size(); } // Write the on-disk open-addressing hash table containing symbols. hdr->symtabOff = buf - start; size_t symtabSize = computeSymtabSize(); uint32_t mask = symtabSize - 1; for (GdbSymbol &sym : symbols) { uint32_t h = sym.name.hash(); uint32_t i = h & mask; uint32_t step = ((h * 17) & mask) | 1; while (read32le(buf + i * 8)) i = (i + step) & mask; write32le(buf + i * 8, sym.nameOff); write32le(buf + i * 8 + 4, sym.cuVectorOff); } buf += symtabSize * 8; // Write the string pool. hdr->constantPoolOff = buf - start; parallelForEach(symbols, [&](GdbSymbol &sym) { memcpy(buf + sym.nameOff, sym.name.data(), sym.name.size()); }); // Write the CU vectors. for (GdbSymbol &sym : symbols) { write32le(buf, sym.cuVector.size()); buf += 4; for (uint32_t val : sym.cuVector) { write32le(buf, val); buf += 4; } } } bool GdbIndexSection::isNeeded() const { return !chunks.empty(); } EhFrameHeader::EhFrameHeader() : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {} void EhFrameHeader::writeTo(uint8_t *buf) { // Unlike most sections, the EhFrameHeader section is written while writing // another section, namely EhFrameSection, which calls the write() function // below from its writeTo() function. This is necessary because the contents // of EhFrameHeader depend on the relocated contents of EhFrameSection and we // don't know which order the sections will be written in. } // .eh_frame_hdr contains a binary search table of pointers to FDEs. // Each entry of the search table consists of two values, // the starting PC from where FDEs covers, and the FDE's address. // It is sorted by PC. void EhFrameHeader::write() { uint8_t *buf = Out::bufferStart + getParent()->offset + outSecOff; using FdeData = EhFrameSection::FdeData; std::vector fdes = getPartition().ehFrame->getFdeData(); buf[0] = 1; buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; buf[2] = DW_EH_PE_udata4; buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; write32(buf + 4, getPartition().ehFrame->getParent()->addr - this->getVA() - 4); write32(buf + 8, fdes.size()); buf += 12; for (FdeData &fde : fdes) { write32(buf, fde.pcRel); write32(buf + 4, fde.fdeVARel); buf += 8; } } size_t EhFrameHeader::getSize() const { // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. return 12 + getPartition().ehFrame->numFdes * 8; } bool EhFrameHeader::isNeeded() const { return isLive() && getPartition().ehFrame->isNeeded(); } VersionDefinitionSection::VersionDefinitionSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), ".gnu.version_d") {} StringRef VersionDefinitionSection::getFileDefName() { if (!getPartition().name.empty()) return getPartition().name; if (!config->soName.empty()) return config->soName; return config->outputFile; } void VersionDefinitionSection::finalizeContents() { fileDefNameOff = getPartition().dynStrTab->addString(getFileDefName()); for (const VersionDefinition &v : namedVersionDefs()) verDefNameOffs.push_back(getPartition().dynStrTab->addString(v.name)); if (OutputSection *sec = getPartition().dynStrTab->getParent()) getParent()->link = sec->sectionIndex; // sh_info should be set to the number of definitions. This fact is missed in // documentation, but confirmed by binutils community: // https://sourceware.org/ml/binutils/2014-11/msg00355.html getParent()->info = getVerDefNum(); } void VersionDefinitionSection::writeOne(uint8_t *buf, uint32_t index, StringRef name, size_t nameOff) { uint16_t flags = index == 1 ? VER_FLG_BASE : 0; // Write a verdef. write16(buf, 1); // vd_version write16(buf + 2, flags); // vd_flags write16(buf + 4, index); // vd_ndx write16(buf + 6, 1); // vd_cnt write32(buf + 8, hashSysV(name)); // vd_hash write32(buf + 12, 20); // vd_aux write32(buf + 16, 28); // vd_next // Write a veraux. write32(buf + 20, nameOff); // vda_name write32(buf + 24, 0); // vda_next } void VersionDefinitionSection::writeTo(uint8_t *buf) { writeOne(buf, 1, getFileDefName(), fileDefNameOff); auto nameOffIt = verDefNameOffs.begin(); for (const VersionDefinition &v : namedVersionDefs()) { buf += EntrySize; writeOne(buf, v.id, v.name, *nameOffIt++); } // Need to terminate the last version definition. write32(buf + 16, 0); // vd_next } size_t VersionDefinitionSection::getSize() const { return EntrySize * getVerDefNum(); } // .gnu.version is a table where each entry is 2 byte long. VersionTableSection::VersionTableSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), ".gnu.version") { this->entsize = 2; } void VersionTableSection::finalizeContents() { // At the moment of june 2016 GNU docs does not mention that sh_link field // should be set, but Sun docs do. Also readelf relies on this field. getParent()->link = getPartition().dynSymTab->getParent()->sectionIndex; } size_t VersionTableSection::getSize() const { return (getPartition().dynSymTab->getSymbols().size() + 1) * 2; } void VersionTableSection::writeTo(uint8_t *buf) { buf += 2; for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) { write16(buf, s.sym->versionId); buf += 2; } } bool VersionTableSection::isNeeded() const { return isLive() && (getPartition().verDef || getPartition().verNeed->isNeeded()); } void addVerneed(Symbol *ss) { auto &file = cast(*ss->file); if (ss->verdefIndex == VER_NDX_GLOBAL) { ss->versionId = VER_NDX_GLOBAL; return; } if (file.vernauxs.empty()) file.vernauxs.resize(file.verdefs.size()); // Select a version identifier for the vernaux data structure, if we haven't // already allocated one. The verdef identifiers cover the range // [1..getVerDefNum()]; this causes the vernaux identifiers to start from // getVerDefNum()+1. if (file.vernauxs[ss->verdefIndex] == 0) file.vernauxs[ss->verdefIndex] = ++SharedFile::vernauxNum + getVerDefNum(); ss->versionId = file.vernauxs[ss->verdefIndex]; } template VersionNeedSection::VersionNeedSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), ".gnu.version_r") {} template void VersionNeedSection::finalizeContents() { for (SharedFile *f : sharedFiles) { if (f->vernauxs.empty()) continue; verneeds.emplace_back(); Verneed &vn = verneeds.back(); vn.nameStrTab = getPartition().dynStrTab->addString(f->soName); for (unsigned i = 0; i != f->vernauxs.size(); ++i) { if (f->vernauxs[i] == 0) continue; auto *verdef = reinterpret_cast(f->verdefs[i]); vn.vernauxs.push_back( {verdef->vd_hash, f->vernauxs[i], getPartition().dynStrTab->addString(f->getStringTable().data() + verdef->getAux()->vda_name)}); } } if (OutputSection *sec = getPartition().dynStrTab->getParent()) getParent()->link = sec->sectionIndex; getParent()->info = verneeds.size(); } template void VersionNeedSection::writeTo(uint8_t *buf) { // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. auto *verneed = reinterpret_cast(buf); auto *vernaux = reinterpret_cast(verneed + verneeds.size()); for (auto &vn : verneeds) { // Create an Elf_Verneed for this DSO. verneed->vn_version = 1; verneed->vn_cnt = vn.vernauxs.size(); verneed->vn_file = vn.nameStrTab; verneed->vn_aux = reinterpret_cast(vernaux) - reinterpret_cast(verneed); verneed->vn_next = sizeof(Elf_Verneed); ++verneed; // Create the Elf_Vernauxs for this Elf_Verneed. for (auto &vna : vn.vernauxs) { vernaux->vna_hash = vna.hash; vernaux->vna_flags = 0; vernaux->vna_other = vna.verneedIndex; vernaux->vna_name = vna.nameStrTab; vernaux->vna_next = sizeof(Elf_Vernaux); ++vernaux; } vernaux[-1].vna_next = 0; } verneed[-1].vn_next = 0; } template size_t VersionNeedSection::getSize() const { return verneeds.size() * sizeof(Elf_Verneed) + SharedFile::vernauxNum * sizeof(Elf_Vernaux); } template bool VersionNeedSection::isNeeded() const { return isLive() && SharedFile::vernauxNum != 0; } void MergeSyntheticSection::addSection(MergeInputSection *ms) { ms->parent = this; sections.push_back(ms); assert(alignment == ms->alignment || !(ms->flags & SHF_STRINGS)); alignment = std::max(alignment, ms->alignment); } MergeTailSection::MergeTailSection(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment) : MergeSyntheticSection(name, type, flags, alignment), builder(StringTableBuilder::RAW, alignment) {} size_t MergeTailSection::getSize() const { return builder.getSize(); } void MergeTailSection::writeTo(uint8_t *buf) { builder.write(buf); } void MergeTailSection::finalizeContents() { // Add all string pieces to the string table builder to create section // contents. for (MergeInputSection *sec : sections) for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) if (sec->pieces[i].live) builder.add(sec->getData(i)); // Fix the string table content. After this, the contents will never change. builder.finalize(); // finalize() fixed tail-optimized strings, so we can now get // offsets of strings. Get an offset for each string and save it // to a corresponding SectionPiece for easy access. for (MergeInputSection *sec : sections) for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) if (sec->pieces[i].live) sec->pieces[i].outputOff = builder.getOffset(sec->getData(i)); } void MergeNoTailSection::writeTo(uint8_t *buf) { for (size_t i = 0; i < numShards; ++i) shards[i].write(buf + shardOffsets[i]); } // This function is very hot (i.e. it can take several seconds to finish) // because sometimes the number of inputs is in an order of magnitude of // millions. So, we use multi-threading. // // For any strings S and T, we know S is not mergeable with T if S's hash // value is different from T's. If that's the case, we can safely put S and // T into different string builders without worrying about merge misses. // We do it in parallel. void MergeNoTailSection::finalizeContents() { // Initializes string table builders. for (size_t i = 0; i < numShards; ++i) shards.emplace_back(StringTableBuilder::RAW, alignment); // Concurrency level. Must be a power of 2 to avoid expensive modulo // operations in the following tight loop. size_t concurrency = 1; if (threadsEnabled) concurrency = std::min(PowerOf2Floor(hardware_concurrency()), numShards); // Add section pieces to the builders. parallelForEachN(0, concurrency, [&](size_t threadId) { for (MergeInputSection *sec : sections) { for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) { if (!sec->pieces[i].live) continue; size_t shardId = getShardId(sec->pieces[i].hash); if ((shardId & (concurrency - 1)) == threadId) sec->pieces[i].outputOff = shards[shardId].add(sec->getData(i)); } } }); // Compute an in-section offset for each shard. size_t off = 0; for (size_t i = 0; i < numShards; ++i) { shards[i].finalizeInOrder(); if (shards[i].getSize() > 0) off = alignTo(off, alignment); shardOffsets[i] = off; off += shards[i].getSize(); } size = off; // So far, section pieces have offsets from beginning of shards, but // we want offsets from beginning of the whole section. Fix them. parallelForEach(sections, [&](MergeInputSection *sec) { for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) if (sec->pieces[i].live) sec->pieces[i].outputOff += shardOffsets[getShardId(sec->pieces[i].hash)]; }); } MergeSyntheticSection *createMergeSynthetic(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment) { bool shouldTailMerge = (flags & SHF_STRINGS) && config->optimize >= 2; if (shouldTailMerge) return make(name, type, flags, alignment); return make(name, type, flags, alignment); } template void splitSections() { // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). parallelForEach(inputSections, [](InputSectionBase *sec) { if (auto *s = dyn_cast(sec)) s->splitIntoPieces(); else if (auto *eh = dyn_cast(sec)) eh->split(); }); } MipsRldMapSection::MipsRldMapSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize, ".rld_map") {} ARMExidxSyntheticSection::ARMExidxSyntheticSection() : SyntheticSection(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, config->wordsize, ".ARM.exidx") {} static InputSection *findExidxSection(InputSection *isec) { for (InputSection *d : isec->dependentSections) if (d->type == SHT_ARM_EXIDX) return d; return nullptr; } static bool isValidExidxSectionDep(InputSection *isec) { return (isec->flags & SHF_ALLOC) && (isec->flags & SHF_EXECINSTR) && isec->getSize() > 0; } bool ARMExidxSyntheticSection::addSection(InputSection *isec) { if (isec->type == SHT_ARM_EXIDX) { if (InputSection *dep = isec->getLinkOrderDep()) if (isValidExidxSectionDep(dep)) exidxSections.push_back(isec); return true; } if (isValidExidxSectionDep(isec)) { executableSections.push_back(isec); return false; } // FIXME: we do not output a relocation section when --emit-relocs is used // as we do not have relocation sections for linker generated table entries // and we would have to erase at a late stage relocations from merged entries. // Given that exception tables are already position independent and a binary // analyzer could derive the relocations we choose to erase the relocations. if (config->emitRelocs && isec->type == SHT_REL) if (InputSectionBase *ex = isec->getRelocatedSection()) if (isa(ex) && ex->type == SHT_ARM_EXIDX) return true; return false; } // References to .ARM.Extab Sections have bit 31 clear and are not the // special EXIDX_CANTUNWIND bit-pattern. static bool isExtabRef(uint32_t unwind) { return (unwind & 0x80000000) == 0 && unwind != 0x1; } // Return true if the .ARM.exidx section Cur can be merged into the .ARM.exidx // section Prev, where Cur follows Prev in the table. This can be done if the // unwinding instructions in Cur are identical to Prev. Linker generated // EXIDX_CANTUNWIND entries are represented by nullptr as they do not have an // InputSection. static bool isDuplicateArmExidxSec(InputSection *prev, InputSection *cur) { struct ExidxEntry { ulittle32_t fn; ulittle32_t unwind; }; // Get the last table Entry from the previous .ARM.exidx section. If Prev is // nullptr then it will be a synthesized EXIDX_CANTUNWIND entry. ExidxEntry prevEntry = {ulittle32_t(0), ulittle32_t(1)}; if (prev) prevEntry = prev->getDataAs().back(); if (isExtabRef(prevEntry.unwind)) return false; // We consider the unwind instructions of an .ARM.exidx table entry // a duplicate if the previous unwind instructions if: // - Both are the special EXIDX_CANTUNWIND. // - Both are the same inline unwind instructions. // We do not attempt to follow and check links into .ARM.extab tables as // consecutive identical entries are rare and the effort to check that they // are identical is high. // If Cur is nullptr then this is synthesized EXIDX_CANTUNWIND entry. if (cur == nullptr) return prevEntry.unwind == 1; for (const ExidxEntry entry : cur->getDataAs()) if (isExtabRef(entry.unwind) || entry.unwind != prevEntry.unwind) return false; // All table entries in this .ARM.exidx Section can be merged into the // previous Section. return true; } // The .ARM.exidx table must be sorted in ascending order of the address of the // functions the table describes. Optionally duplicate adjacent table entries // can be removed. At the end of the function the executableSections must be // sorted in ascending order of address, Sentinel is set to the InputSection // with the highest address and any InputSections that have mergeable // .ARM.exidx table entries are removed from it. void ARMExidxSyntheticSection::finalizeContents() { // The executableSections and exidxSections that we use to derive the final // contents of this SyntheticSection are populated before // processSectionCommands() and ICF. A /DISCARD/ entry in SECTIONS command or // ICF may remove executable InputSections and their dependent .ARM.exidx // section that we recorded earlier. auto isDiscarded = [](const InputSection *isec) { return !isec->isLive(); }; llvm::erase_if(executableSections, isDiscarded); llvm::erase_if(exidxSections, isDiscarded); // Sort the executable sections that may or may not have associated // .ARM.exidx sections by order of ascending address. This requires the // relative positions of InputSections to be known. auto compareByFilePosition = [](const InputSection *a, const InputSection *b) { OutputSection *aOut = a->getParent(); OutputSection *bOut = b->getParent(); if (aOut != bOut) return aOut->sectionIndex < bOut->sectionIndex; return a->outSecOff < b->outSecOff; }; llvm::stable_sort(executableSections, compareByFilePosition); sentinel = executableSections.back(); // Optionally merge adjacent duplicate entries. if (config->mergeArmExidx) { std::vector selectedSections; selectedSections.reserve(executableSections.size()); selectedSections.push_back(executableSections[0]); size_t prev = 0; for (size_t i = 1; i < executableSections.size(); ++i) { InputSection *ex1 = findExidxSection(executableSections[prev]); InputSection *ex2 = findExidxSection(executableSections[i]); if (!isDuplicateArmExidxSec(ex1, ex2)) { selectedSections.push_back(executableSections[i]); prev = i; } } executableSections = std::move(selectedSections); } size_t offset = 0; size = 0; for (InputSection *isec : executableSections) { if (InputSection *d = findExidxSection(isec)) { d->outSecOff = offset; d->parent = getParent(); offset += d->getSize(); } else { offset += 8; } } // Size includes Sentinel. size = offset + 8; } InputSection *ARMExidxSyntheticSection::getLinkOrderDep() const { return executableSections.front(); } // To write the .ARM.exidx table from the ExecutableSections we have three cases // 1.) The InputSection has a .ARM.exidx InputSection in its dependent sections. // We write the .ARM.exidx section contents and apply its relocations. // 2.) The InputSection does not have a dependent .ARM.exidx InputSection. We // must write the contents of an EXIDX_CANTUNWIND directly. We use the // start of the InputSection as the purpose of the linker generated // section is to terminate the address range of the previous entry. // 3.) A trailing EXIDX_CANTUNWIND sentinel section is required at the end of // the table to terminate the address range of the final entry. void ARMExidxSyntheticSection::writeTo(uint8_t *buf) { const uint8_t cantUnwindData[8] = {0, 0, 0, 0, // PREL31 to target 1, 0, 0, 0}; // EXIDX_CANTUNWIND uint64_t offset = 0; for (InputSection *isec : executableSections) { assert(isec->getParent() != nullptr); if (InputSection *d = findExidxSection(isec)) { memcpy(buf + offset, d->data().data(), d->data().size()); d->relocateAlloc(buf, buf + d->getSize()); offset += d->getSize(); } else { // A Linker generated CANTUNWIND section. memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData)); uint64_t s = isec->getVA(); uint64_t p = getVA() + offset; target->relocateOne(buf + offset, R_ARM_PREL31, s - p); offset += 8; } } // Write Sentinel. memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData)); uint64_t s = sentinel->getVA(sentinel->getSize()); uint64_t p = getVA() + offset; target->relocateOne(buf + offset, R_ARM_PREL31, s - p); assert(size == offset + 8); } bool ARMExidxSyntheticSection::isNeeded() const { return llvm::find_if(exidxSections, [](InputSection *isec) { return isec->isLive(); }) != exidxSections.end(); } bool ARMExidxSyntheticSection::classof(const SectionBase *d) { return d->kind() == InputSectionBase::Synthetic && d->type == SHT_ARM_EXIDX; } ThunkSection::ThunkSection(OutputSection *os, uint64_t off) : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, ".text.thunk") { this->parent = os; this->outSecOff = off; } size_t ThunkSection::getSize() const { if (roundUpSizeForErrata) return alignTo(size, 4096); return size; } void ThunkSection::addThunk(Thunk *t) { thunks.push_back(t); t->addSymbols(*this); } void ThunkSection::writeTo(uint8_t *buf) { for (Thunk *t : thunks) t->writeTo(buf + t->offset); } InputSection *ThunkSection::getTargetInputSection() const { if (thunks.empty()) return nullptr; const Thunk *t = thunks.front(); return t->getTargetInputSection(); } bool ThunkSection::assignOffsets() { uint64_t off = 0; for (Thunk *t : thunks) { off = alignTo(off, t->alignment); t->setOffset(off); uint32_t size = t->size(); t->getThunkTargetSym()->size = size; off += size; } bool changed = off != size; size = off; return changed; } PPC32Got2Section::PPC32Got2Section() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 4, ".got2") {} bool PPC32Got2Section::isNeeded() const { // See the comment below. This is not needed if there is no other // InputSection. for (BaseCommand *base : getParent()->sectionCommands) if (auto *isd = dyn_cast(base)) for (InputSection *isec : isd->sections) if (isec != this) return true; return false; } void PPC32Got2Section::finalizeContents() { // PPC32 may create multiple GOT sections for -fPIC/-fPIE, one per file in // .got2 . This function computes outSecOff of each .got2 to be used in // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is // to collect input sections named ".got2". uint32_t offset = 0; for (BaseCommand *base : getParent()->sectionCommands) if (auto *isd = dyn_cast(base)) { for (InputSection *isec : isd->sections) { if (isec == this) continue; isec->file->ppc32Got2OutSecOff = offset; offset += (uint32_t)isec->getSize(); } } } // If linking position-dependent code then the table will store the addresses // directly in the binary so the section has type SHT_PROGBITS. If linking // position-independent code the section has type SHT_NOBITS since it will be // allocated and filled in by the dynamic linker. PPC64LongBranchTargetSection::PPC64LongBranchTargetSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, config->isPic ? SHT_NOBITS : SHT_PROGBITS, 8, ".branch_lt") {} uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym, int64_t addend) { return getVA() + entry_index.find({sym, addend})->second * 8; } Optional PPC64LongBranchTargetSection::addEntry(const Symbol *sym, int64_t addend) { auto res = entry_index.try_emplace(std::make_pair(sym, addend), entries.size()); if (!res.second) return None; entries.emplace_back(sym, addend); return res.first->second; } size_t PPC64LongBranchTargetSection::getSize() const { return entries.size() * 8; } void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) { // If linking non-pic we have the final addresses of the targets and they get // written to the table directly. For pic the dynamic linker will allocate // the section and fill it it. if (config->isPic) return; for (auto entry : entries) { const Symbol *sym = entry.first; int64_t addend = entry.second; assert(sym->getVA()); // Need calls to branch to the local entry-point since a long-branch // must be a local-call. write64(buf, sym->getVA(addend) + getPPC64GlobalEntryToLocalEntryOffset(sym->stOther)); buf += 8; } } bool PPC64LongBranchTargetSection::isNeeded() const { // `removeUnusedSyntheticSections()` is called before thunk allocation which // is too early to determine if this section will be empty or not. We need // Finalized to keep the section alive until after thunk creation. Finalized // only gets set to true once `finalizeSections()` is called after thunk // creation. Because of this, if we don't create any long-branch thunks we end // up with an empty .branch_lt section in the binary. return !finalized || !entries.empty(); } static uint8_t getAbiVersion() { // MIPS non-PIC executable gets ABI version 1. if (config->emachine == EM_MIPS) { if (!config->isPic && !config->relocatable && (config->eflags & (EF_MIPS_PIC | EF_MIPS_CPIC)) == EF_MIPS_CPIC) return 1; return 0; } if (config->emachine == EM_AMDGPU) { uint8_t ver = objectFiles[0]->abiVersion; for (InputFile *file : makeArrayRef(objectFiles).slice(1)) if (file->abiVersion != ver) error("incompatible ABI version: " + toString(file)); return ver; } return 0; } template void writeEhdr(uint8_t *buf, Partition &part) { // For executable segments, the trap instructions are written before writing // the header. Setting Elf header bytes to zero ensures that any unused bytes // in header are zero-cleared, instead of having trap instructions. memset(buf, 0, sizeof(typename ELFT::Ehdr)); memcpy(buf, "\177ELF", 4); auto *eHdr = reinterpret_cast(buf); eHdr->e_ident[EI_CLASS] = config->is64 ? ELFCLASS64 : ELFCLASS32; eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB; eHdr->e_ident[EI_VERSION] = EV_CURRENT; eHdr->e_ident[EI_OSABI] = config->osabi; eHdr->e_ident[EI_ABIVERSION] = getAbiVersion(); eHdr->e_machine = config->emachine; eHdr->e_version = EV_CURRENT; eHdr->e_flags = config->eflags; eHdr->e_ehsize = sizeof(typename ELFT::Ehdr); eHdr->e_phnum = part.phdrs.size(); eHdr->e_shentsize = sizeof(typename ELFT::Shdr); if (!config->relocatable) { eHdr->e_phoff = sizeof(typename ELFT::Ehdr); eHdr->e_phentsize = sizeof(typename ELFT::Phdr); } } template void writePhdrs(uint8_t *buf, Partition &part) { // Write the program header table. auto *hBuf = reinterpret_cast(buf); for (PhdrEntry *p : part.phdrs) { hBuf->p_type = p->p_type; hBuf->p_flags = p->p_flags; hBuf->p_offset = p->p_offset; hBuf->p_vaddr = p->p_vaddr; hBuf->p_paddr = p->p_paddr; hBuf->p_filesz = p->p_filesz; hBuf->p_memsz = p->p_memsz; hBuf->p_align = p->p_align; ++hBuf; } } template PartitionElfHeaderSection::PartitionElfHeaderSection() : SyntheticSection(SHF_ALLOC, SHT_LLVM_PART_EHDR, 1, "") {} template size_t PartitionElfHeaderSection::getSize() const { return sizeof(typename ELFT::Ehdr); } template void PartitionElfHeaderSection::writeTo(uint8_t *buf) { writeEhdr(buf, getPartition()); // Loadable partitions are always ET_DYN. auto *eHdr = reinterpret_cast(buf); eHdr->e_type = ET_DYN; } template PartitionProgramHeadersSection::PartitionProgramHeadersSection() : SyntheticSection(SHF_ALLOC, SHT_LLVM_PART_PHDR, 1, ".phdrs") {} template size_t PartitionProgramHeadersSection::getSize() const { return sizeof(typename ELFT::Phdr) * getPartition().phdrs.size(); } template void PartitionProgramHeadersSection::writeTo(uint8_t *buf) { writePhdrs(buf, getPartition()); } PartitionIndexSection::PartitionIndexSection() : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".rodata") {} size_t PartitionIndexSection::getSize() const { return 12 * (partitions.size() - 1); } void PartitionIndexSection::finalizeContents() { for (size_t i = 1; i != partitions.size(); ++i) partitions[i].nameStrTab = mainPart->dynStrTab->addString(partitions[i].name); } void PartitionIndexSection::writeTo(uint8_t *buf) { uint64_t va = getVA(); for (size_t i = 1; i != partitions.size(); ++i) { write32(buf, mainPart->dynStrTab->getVA() + partitions[i].nameStrTab - va); write32(buf + 4, partitions[i].elfHeader->getVA() - (va + 4)); SyntheticSection *next = i == partitions.size() - 1 ? in.partEnd : partitions[i + 1].elfHeader; write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA()); va += 12; buf += 12; } } InStruct in; std::vector partitions; Partition *mainPart; template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); template void splitSections(); template void splitSections(); template void splitSections(); template void splitSections(); template class MipsAbiFlagsSection; template class MipsAbiFlagsSection; template class MipsAbiFlagsSection; template class MipsAbiFlagsSection; template class MipsOptionsSection; template class MipsOptionsSection; template class MipsOptionsSection; template class MipsOptionsSection; template class MipsReginfoSection; template class MipsReginfoSection; template class MipsReginfoSection; template class MipsReginfoSection; template class DynamicSection; template class DynamicSection; template class DynamicSection; template class DynamicSection; template class RelocationSection; template class RelocationSection; template class RelocationSection; template class RelocationSection; template class AndroidPackedRelocationSection; template class AndroidPackedRelocationSection; template class AndroidPackedRelocationSection; template class AndroidPackedRelocationSection; template class RelrSection; template class RelrSection; template class RelrSection; template class RelrSection; template class SymbolTableSection; template class SymbolTableSection; template class SymbolTableSection; template class SymbolTableSection; template class VersionNeedSection; template class VersionNeedSection; template class VersionNeedSection; template class VersionNeedSection; template void writeEhdr(uint8_t *Buf, Partition &Part); template void writeEhdr(uint8_t *Buf, Partition &Part); template void writeEhdr(uint8_t *Buf, Partition &Part); template void writeEhdr(uint8_t *Buf, Partition &Part); template void writePhdrs(uint8_t *Buf, Partition &Part); template void writePhdrs(uint8_t *Buf, Partition &Part); template void writePhdrs(uint8_t *Buf, Partition &Part); template void writePhdrs(uint8_t *Buf, Partition &Part); template class PartitionElfHeaderSection; template class PartitionElfHeaderSection; template class PartitionElfHeaderSection; template class PartitionElfHeaderSection; template class PartitionProgramHeadersSection; template class PartitionProgramHeadersSection; template class PartitionProgramHeadersSection; template class PartitionProgramHeadersSection; } // namespace elf } // namespace lld Index: head/contrib/llvm-project/lld/ELF/SyntheticSections.h =================================================================== --- head/contrib/llvm-project/lld/ELF/SyntheticSections.h (revision 359083) +++ head/contrib/llvm-project/lld/ELF/SyntheticSections.h (revision 359084) @@ -1,1216 +1,1225 @@ //===- SyntheticSection.h ---------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Synthetic sections represent chunks of linker-created data. If you // need to create a chunk of data that to be included in some section // in the result, you probably want to create that as a synthetic section. // // Synthetic sections are designed as input sections as opposed to // output sections because we want to allow them to be manipulated // using linker scripts just like other input sections from regular // files. // //===----------------------------------------------------------------------===// #ifndef LLD_ELF_SYNTHETIC_SECTIONS_H #define LLD_ELF_SYNTHETIC_SECTIONS_H #include "DWARF.h" #include "EhFrame.h" #include "InputSection.h" #include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" #include namespace lld { namespace elf { class Defined; struct PhdrEntry; class SymbolTableBaseSection; class VersionNeedBaseSection; class SyntheticSection : public InputSection { public: SyntheticSection(uint64_t flags, uint32_t type, uint32_t alignment, StringRef name) : InputSection(nullptr, flags, type, alignment, {}, name, InputSectionBase::Synthetic) { markLive(); } virtual ~SyntheticSection() = default; virtual void writeTo(uint8_t *buf) = 0; virtual size_t getSize() const = 0; virtual void finalizeContents() {} // If the section has the SHF_ALLOC flag and the size may be changed if // thunks are added, update the section size. virtual bool updateAllocSize() { return false; } virtual bool isNeeded() const { return true; } static bool classof(const SectionBase *d) { return d->kind() == InputSectionBase::Synthetic; } }; struct CieRecord { EhSectionPiece *cie = nullptr; std::vector fdes; }; // Section for .eh_frame. class EhFrameSection final : public SyntheticSection { public: EhFrameSection(); void writeTo(uint8_t *buf) override; void finalizeContents() override; bool isNeeded() const override { return !sections.empty(); } size_t getSize() const override { return size; } static bool classof(const SectionBase *d) { return SyntheticSection::classof(d) && d->name == ".eh_frame"; } void addSection(EhInputSection *sec); std::vector sections; size_t numFdes = 0; struct FdeData { uint32_t pcRel; uint32_t fdeVARel; }; std::vector getFdeData() const; ArrayRef getCieRecords() const { return cieRecords; } private: // This is used only when parsing EhInputSection. We keep it here to avoid // allocating one for each EhInputSection. llvm::DenseMap offsetToCie; uint64_t size = 0; template void addRecords(EhInputSection *s, llvm::ArrayRef rels); template void addSectionAux(EhInputSection *s); template CieRecord *addCie(EhSectionPiece &piece, ArrayRef rels); template bool isFdeLive(EhSectionPiece &piece, ArrayRef rels); uint64_t getFdePc(uint8_t *buf, size_t off, uint8_t enc) const; std::vector cieRecords; // CIE records are uniquified by their contents and personality functions. llvm::DenseMap, Symbol *>, CieRecord *> cieMap; }; class GotSection : public SyntheticSection { public: GotSection(); size_t getSize() const override { return size; } void finalizeContents() override; bool isNeeded() const override; void writeTo(uint8_t *buf) override; void addEntry(Symbol &sym); bool addDynTlsEntry(Symbol &sym); bool addTlsIndex(); uint64_t getGlobalDynAddr(const Symbol &b) const; uint64_t getGlobalDynOffset(const Symbol &b) const; uint64_t getTlsIndexVA() { return this->getVA() + tlsIndexOff; } uint32_t getTlsIndexOff() const { return tlsIndexOff; } // Flag to force GOT to be in output if we have relocations // that relies on its address. bool hasGotOffRel = false; protected: size_t numEntries = 0; uint32_t tlsIndexOff = -1; uint64_t size = 0; }; // .note.GNU-stack section. class GnuStackSection : public SyntheticSection { public: GnuStackSection() : SyntheticSection(0, llvm::ELF::SHT_PROGBITS, 1, ".note.GNU-stack") {} void writeTo(uint8_t *buf) override {} size_t getSize() const override { return 0; } }; class GnuPropertySection : public SyntheticSection { public: GnuPropertySection(); void writeTo(uint8_t *buf) override; size_t getSize() const override; }; // .note.gnu.build-id section. class BuildIdSection : public SyntheticSection { // First 16 bytes are a header. static const unsigned headerSize = 16; public: const size_t hashSize; BuildIdSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override { return headerSize + hashSize; } void writeBuildId(llvm::ArrayRef buf); private: uint8_t *hashBuf; }; // BssSection is used to reserve space for copy relocations and common symbols. // We create three instances of this class for .bss, .bss.rel.ro and "COMMON", // that are used for writable symbols, read-only symbols and common symbols, // respectively. class BssSection final : public SyntheticSection { public: BssSection(StringRef name, uint64_t size, uint32_t alignment); void writeTo(uint8_t *) override { llvm_unreachable("unexpected writeTo() call for SHT_NOBITS section"); } bool isNeeded() const override { return size != 0; } size_t getSize() const override { return size; } static bool classof(const SectionBase *s) { return s->bss; } uint64_t size; }; class MipsGotSection final : public SyntheticSection { public: MipsGotSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } bool updateAllocSize() override; void finalizeContents() override; bool isNeeded() const override; // Join separate GOTs built for each input file to generate // primary and optional multiple secondary GOTs. void build(); void addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr); void addDynTlsEntry(InputFile &file, Symbol &sym); void addTlsIndex(InputFile &file); uint64_t getPageEntryOffset(const InputFile *f, const Symbol &s, int64_t addend) const; uint64_t getSymEntryOffset(const InputFile *f, const Symbol &s, int64_t addend) const; uint64_t getGlobalDynOffset(const InputFile *f, const Symbol &s) const; uint64_t getTlsIndexOffset(const InputFile *f) const; // Returns the symbol which corresponds to the first entry of the global part // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic // table properties. // Returns nullptr if the global part is empty. const Symbol *getFirstGlobalEntry() const; // Returns the number of entries in the local part of GOT including // the number of reserved entries. unsigned getLocalEntriesNum() const; // Return _gp value for primary GOT (nullptr) or particular input file. uint64_t getGp(const InputFile *f = nullptr) const; private: // MIPS GOT consists of three parts: local, global and tls. Each part // contains different types of entries. Here is a layout of GOT: // - Header entries | // - Page entries | Local part // - Local entries (16-bit access) | // - Local entries (32-bit access) | // - Normal global entries || Global part // - Reloc-only global entries || // - TLS entries ||| TLS part // // Header: // Two entries hold predefined value 0x0 and 0x80000000. // Page entries: // These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16 // relocation against local symbols. They are initialized by higher 16-bit // of the corresponding symbol's value. So each 64kb of address space // requires a single GOT entry. // Local entries (16-bit access): // These entries created by GOT relocations against global non-preemptible // symbols so dynamic linker is not necessary to resolve the symbol's // values. "16-bit access" means that corresponding relocations address // GOT using 16-bit index. Each unique Symbol-Addend pair has its own // GOT entry. // Local entries (32-bit access): // These entries are the same as above but created by relocations which // address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc). // Normal global entries: // These entries created by GOT relocations against preemptible global // symbols. They need to be initialized by dynamic linker and they ordered // exactly as the corresponding entries in the dynamic symbols table. // Reloc-only global entries: // These entries created for symbols that are referenced by dynamic // relocations R_MIPS_REL32. These entries are not accessed with gp-relative // addressing, but MIPS ABI requires that these entries be present in GOT. // TLS entries: // Entries created by TLS relocations. // // If the sum of local, global and tls entries is less than 64K only single // got is enough. Otherwise, multi-got is created. Series of primary and // multiple secondary GOTs have the following layout: // - Primary GOT // Header // Local entries // Global entries // Relocation only entries // TLS entries // // - Secondary GOT // Local entries // Global entries // TLS entries // ... // // All GOT entries required by relocations from a single input file entirely // belong to either primary or one of secondary GOTs. To reference GOT entries // each GOT has its own _gp value points to the "middle" of the GOT. // In the code this value loaded to the register which is used for GOT access. // // MIPS 32 function's prologue: // lui v0,0x0 // 0: R_MIPS_HI16 _gp_disp // addiu v0,v0,0 // 4: R_MIPS_LO16 _gp_disp // // MIPS 64: // lui at,0x0 // 14: R_MIPS_GPREL16 main // // Dynamic linker does not know anything about secondary GOTs and cannot // use a regular MIPS mechanism for GOT entries initialization. So we have // to use an approach accepted by other architectures and create dynamic // relocations R_MIPS_REL32 to initialize global entries (and local in case // of PIC code) in secondary GOTs. But ironically MIPS dynamic linker // requires GOT entries and correspondingly ordered dynamic symbol table // entries to deal with dynamic relocations. To handle this problem // relocation-only section in the primary GOT contains entries for all // symbols referenced in global parts of secondary GOTs. Although the sum // of local and normal global entries of the primary got should be less // than 64K, the size of the primary got (including relocation-only entries // can be greater than 64K, because parts of the primary got that overflow // the 64K limit are used only by the dynamic linker at dynamic link-time // and not by 16-bit gp-relative addressing at run-time. // // For complete multi-GOT description see the following link // https://dmz-portal.mips.com/wiki/MIPS_Multi_GOT // Number of "Header" entries. static const unsigned headerEntriesNum = 2; uint64_t size = 0; // Symbol and addend. using GotEntry = std::pair; struct FileGot { InputFile *file = nullptr; size_t startIndex = 0; struct PageBlock { size_t firstIndex; size_t count; PageBlock() : firstIndex(0), count(0) {} }; // Map output sections referenced by MIPS GOT relocations // to the description (index/count) "page" entries allocated // for this section. llvm::SmallMapVector pagesMap; // Maps from Symbol+Addend pair or just Symbol to the GOT entry index. llvm::MapVector local16; llvm::MapVector local32; llvm::MapVector global; llvm::MapVector relocs; llvm::MapVector tls; // Set of symbols referenced by dynamic TLS relocations. llvm::MapVector dynTlsSymbols; // Total number of all entries. size_t getEntriesNum() const; // Number of "page" entries. size_t getPageEntriesNum() const; // Number of entries require 16-bit index to access. size_t getIndexedEntriesNum() const; }; // Container of GOT created for each input file. // After building a final series of GOTs this container // holds primary and secondary GOT's. std::vector gots; // Return (and create if necessary) `FileGot`. FileGot &getGot(InputFile &f); // Try to merge two GOTs. In case of success the `Dst` contains // result of merging and the function returns true. In case of // ovwerflow the `Dst` is unchanged and the function returns false. bool tryMergeGots(FileGot & dst, FileGot & src, bool isPrimary); }; class GotPltSection final : public SyntheticSection { public: GotPltSection(); void addEntry(Symbol &sym); size_t getSize() const override; void writeTo(uint8_t *buf) override; bool isNeeded() const override; // Flag to force GotPlt to be in output if we have relocations // that relies on its address. bool hasGotPltOffRel = false; private: std::vector entries; }; // The IgotPltSection is a Got associated with the PltSection for GNU Ifunc // Symbols that will be relocated by Target->IRelativeRel. // On most Targets the IgotPltSection will immediately follow the GotPltSection // on ARM the IgotPltSection will immediately follow the GotSection. class IgotPltSection final : public SyntheticSection { public: IgotPltSection(); void addEntry(Symbol &sym); size_t getSize() const override; void writeTo(uint8_t *buf) override; bool isNeeded() const override { return !entries.empty(); } private: std::vector entries; }; class StringTableSection final : public SyntheticSection { public: StringTableSection(StringRef name, bool dynamic); unsigned addString(StringRef s, bool hashIt = true); void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } bool isDynamic() const { return dynamic; } private: const bool dynamic; uint64_t size = 0; llvm::DenseMap stringMap; std::vector strings; }; class DynamicReloc { public: DynamicReloc(RelType type, const InputSectionBase *inputSec, uint64_t offsetInSec, bool useSymVA, Symbol *sym, int64_t addend) : type(type), sym(sym), inputSec(inputSec), offsetInSec(offsetInSec), useSymVA(useSymVA), addend(addend), outputSec(nullptr) {} // This constructor records dynamic relocation settings used by MIPS // multi-GOT implementation. It's to relocate addresses of 64kb pages // lie inside the output section. DynamicReloc(RelType type, const InputSectionBase *inputSec, uint64_t offsetInSec, const OutputSection *outputSec, int64_t addend) : type(type), sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec), useSymVA(false), addend(addend), outputSec(outputSec) {} uint64_t getOffset() const; uint32_t getSymIndex(SymbolTableBaseSection *symTab) const; // Computes the addend of the dynamic relocation. Note that this is not the // same as the addend member variable as it also includes the symbol address // if useSymVA is true. int64_t computeAddend() const; RelType type; Symbol *sym; const InputSectionBase *inputSec = nullptr; uint64_t offsetInSec; // If this member is true, the dynamic relocation will not be against the // symbol but will instead be a relative relocation that simply adds the // load address. This means we need to write the symbol virtual address // plus the original addend as the final relocation addend. bool useSymVA; int64_t addend; const OutputSection *outputSec; }; template class DynamicSection final : public SyntheticSection { using Elf_Dyn = typename ELFT::Dyn; using Elf_Rel = typename ELFT::Rel; using Elf_Rela = typename ELFT::Rela; using Elf_Relr = typename ELFT::Relr; using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; // finalizeContents() fills this vector with the section contents. std::vector>> entries; public: DynamicSection(); void finalizeContents() override; void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } private: void add(int32_t tag, std::function fn); void addInt(int32_t tag, uint64_t val); void addInSec(int32_t tag, InputSection *sec); void addInSecRelative(int32_t tag, InputSection *sec); void addOutSec(int32_t tag, OutputSection *sec); void addSize(int32_t tag, OutputSection *sec); void addSym(int32_t tag, Symbol *sym); uint64_t size = 0; }; class RelocationBaseSection : public SyntheticSection { public: RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag, int32_t sizeDynamicTag); void addReloc(RelType dynType, InputSectionBase *isec, uint64_t offsetInSec, Symbol *sym); // Add a dynamic relocation that might need an addend. This takes care of // writing the addend to the output section if needed. void addReloc(RelType dynType, InputSectionBase *inputSec, uint64_t offsetInSec, Symbol *sym, int64_t addend, RelExpr expr, RelType type); void addReloc(const DynamicReloc &reloc); bool isNeeded() const override { return !relocs.empty(); } size_t getSize() const override { return relocs.size() * this->entsize; } size_t getRelativeRelocCount() const { return numRelativeRelocs; } void finalizeContents() override; int32_t dynamicTag, sizeDynamicTag; std::vector relocs; protected: size_t numRelativeRelocs = 0; }; template class RelocationSection final : public RelocationBaseSection { using Elf_Rel = typename ELFT::Rel; using Elf_Rela = typename ELFT::Rela; public: RelocationSection(StringRef name, bool sort); void writeTo(uint8_t *buf) override; private: bool sort; }; template class AndroidPackedRelocationSection final : public RelocationBaseSection { using Elf_Rel = typename ELFT::Rel; using Elf_Rela = typename ELFT::Rela; public: AndroidPackedRelocationSection(StringRef name); bool updateAllocSize() override; size_t getSize() const override { return relocData.size(); } void writeTo(uint8_t *buf) override { memcpy(buf, relocData.data(), relocData.size()); } private: SmallVector relocData; }; struct RelativeReloc { uint64_t getOffset() const { return inputSec->getVA(offsetInSec); } const InputSectionBase *inputSec; uint64_t offsetInSec; }; class RelrBaseSection : public SyntheticSection { public: RelrBaseSection(); bool isNeeded() const override { return !relocs.empty(); } std::vector relocs; }; // RelrSection is used to encode offsets for relative relocations. // Proposal for adding SHT_RELR sections to generic-abi is here: // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg // For more details, see the comment in RelrSection::updateAllocSize(). template class RelrSection final : public RelrBaseSection { using Elf_Relr = typename ELFT::Relr; public: RelrSection(); bool updateAllocSize() override; size_t getSize() const override { return relrRelocs.size() * this->entsize; } void writeTo(uint8_t *buf) override { memcpy(buf, relrRelocs.data(), getSize()); } private: std::vector relrRelocs; }; struct SymbolTableEntry { Symbol *sym; size_t strTabOffset; }; class SymbolTableBaseSection : public SyntheticSection { public: SymbolTableBaseSection(StringTableSection &strTabSec); void finalizeContents() override; size_t getSize() const override { return getNumSymbols() * entsize; } void addSymbol(Symbol *sym); unsigned getNumSymbols() const { return symbols.size() + 1; } size_t getSymbolIndex(Symbol *sym); ArrayRef getSymbols() const { return symbols; } protected: void sortSymTabSymbols(); // A vector of symbols and their string table offsets. std::vector symbols; StringTableSection &strTabSec; llvm::once_flag onceFlag; llvm::DenseMap symbolIndexMap; llvm::DenseMap sectionIndexMap; }; template class SymbolTableSection final : public SymbolTableBaseSection { using Elf_Sym = typename ELFT::Sym; public: SymbolTableSection(StringTableSection &strTabSec); void writeTo(uint8_t *buf) override; }; class SymtabShndxSection final : public SyntheticSection { public: SymtabShndxSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override; bool isNeeded() const override; void finalizeContents() override; }; // Outputs GNU Hash section. For detailed explanation see: // https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections class GnuHashTableSection final : public SyntheticSection { public: GnuHashTableSection(); void finalizeContents() override; void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } // Adds symbols to the hash table. // Sorts the input to satisfy GNU hash section requirements. void addSymbols(std::vector &symbols); private: // See the comment in writeBloomFilter. enum { Shift2 = 26 }; void writeBloomFilter(uint8_t *buf); void writeHashTable(uint8_t *buf); struct Entry { Symbol *sym; size_t strTabOffset; uint32_t hash; uint32_t bucketIdx; }; std::vector symbols; size_t maskWords; size_t nBuckets = 0; size_t size = 0; }; class HashTableSection final : public SyntheticSection { public: HashTableSection(); void finalizeContents() override; void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } private: size_t size = 0; }; // Used for PLT entries. It usually has a PLT header for lazy binding. Each PLT // entry is associated with a JUMP_SLOT relocation, which may be resolved lazily // at runtime. // // On PowerPC, this section contains lazy symbol resolvers. A branch instruction // jumps to a PLT call stub, which will then jump to the target (BIND_NOW) or a // lazy symbol resolver. // // On x86 when IBT is enabled, this section (.plt.sec) contains PLT call stubs. // A call instruction jumps to a .plt.sec entry, which will then jump to the // target (BIND_NOW) or a .plt entry. class PltSection : public SyntheticSection { public: PltSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override; bool isNeeded() const override; void addSymbols(); void addEntry(Symbol &sym); size_t getNumEntries() const { return entries.size(); } size_t headerSize; - size_t footerSize = 0; std::vector entries; }; // Used for non-preemptible ifuncs. It does not have a header. Each entry is // associated with an IRELATIVE relocation, which will be resolved eagerly at // runtime. PltSection can only contain entries associated with JUMP_SLOT // relocations, so IPLT entries are in a separate section. class IpltSection final : public SyntheticSection { std::vector entries; public: IpltSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override; bool isNeeded() const override { return !entries.empty(); } void addSymbols(); void addEntry(Symbol &sym); +}; + +class PPC32GlinkSection : public PltSection { +public: + PPC32GlinkSection(); + void writeTo(uint8_t *buf) override; + size_t getSize() const override; + + std::vector canonical_plts; + static constexpr size_t footerSize = 64; }; // This is x86-only. class IBTPltSection : public SyntheticSection { public: IBTPltSection(); void writeTo(uint8_t *Buf) override; size_t getSize() const override; }; class GdbIndexSection final : public SyntheticSection { public: struct AddressEntry { InputSection *section; uint64_t lowAddress; uint64_t highAddress; uint32_t cuIndex; }; struct CuEntry { uint64_t cuOffset; uint64_t cuLength; }; struct NameAttrEntry { llvm::CachedHashStringRef name; uint32_t cuIndexAndAttrs; }; struct GdbChunk { InputSection *sec; std::vector addressAreas; std::vector compilationUnits; }; struct GdbSymbol { llvm::CachedHashStringRef name; std::vector cuVector; uint32_t nameOff; uint32_t cuVectorOff; }; GdbIndexSection(); template static GdbIndexSection *create(); void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } bool isNeeded() const override; private: struct GdbIndexHeader { llvm::support::ulittle32_t version; llvm::support::ulittle32_t cuListOff; llvm::support::ulittle32_t cuTypesOff; llvm::support::ulittle32_t addressAreaOff; llvm::support::ulittle32_t symtabOff; llvm::support::ulittle32_t constantPoolOff; }; void initOutputSize(); size_t computeSymtabSize() const; // Each chunk contains information gathered from debug sections of a // single object file. std::vector chunks; // A symbol table for this .gdb_index section. std::vector symbols; size_t size; }; // --eh-frame-hdr option tells linker to construct a header for all the // .eh_frame sections. This header is placed to a section named .eh_frame_hdr // and also to a PT_GNU_EH_FRAME segment. // At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by // calling dl_iterate_phdr. // This section contains a lookup table for quick binary search of FDEs. // Detailed info about internals can be found in Ian Lance Taylor's blog: // http://www.airs.com/blog/archives/460 (".eh_frame") // http://www.airs.com/blog/archives/462 (".eh_frame_hdr") class EhFrameHeader final : public SyntheticSection { public: EhFrameHeader(); void write(); void writeTo(uint8_t *buf) override; size_t getSize() const override; bool isNeeded() const override; }; // For more information about .gnu.version and .gnu.version_r see: // https://www.akkadia.org/drepper/symbol-versioning // The .gnu.version_d section which has a section type of SHT_GNU_verdef shall // contain symbol version definitions. The number of entries in this section // shall be contained in the DT_VERDEFNUM entry of the .dynamic section. // The section shall contain an array of Elf_Verdef structures, optionally // followed by an array of Elf_Verdaux structures. class VersionDefinitionSection final : public SyntheticSection { public: VersionDefinitionSection(); void finalizeContents() override; size_t getSize() const override; void writeTo(uint8_t *buf) override; private: enum { EntrySize = 28 }; void writeOne(uint8_t *buf, uint32_t index, StringRef name, size_t nameOff); StringRef getFileDefName(); unsigned fileDefNameOff; std::vector verDefNameOffs; }; // The .gnu.version section specifies the required version of each symbol in the // dynamic symbol table. It contains one Elf_Versym for each dynamic symbol // table entry. An Elf_Versym is just a 16-bit integer that refers to a version // identifier defined in the either .gnu.version_r or .gnu.version_d section. // The values 0 and 1 are reserved. All other values are used for versions in // the own object or in any of the dependencies. class VersionTableSection final : public SyntheticSection { public: VersionTableSection(); void finalizeContents() override; size_t getSize() const override; void writeTo(uint8_t *buf) override; bool isNeeded() const override; }; // The .gnu.version_r section defines the version identifiers used by // .gnu.version. It contains a linked list of Elf_Verneed data structures. Each // Elf_Verneed specifies the version requirements for a single DSO, and contains // a reference to a linked list of Elf_Vernaux data structures which define the // mapping from version identifiers to version names. template class VersionNeedSection final : public SyntheticSection { using Elf_Verneed = typename ELFT::Verneed; using Elf_Vernaux = typename ELFT::Vernaux; struct Vernaux { uint64_t hash; uint32_t verneedIndex; uint64_t nameStrTab; }; struct Verneed { uint64_t nameStrTab; std::vector vernauxs; }; std::vector verneeds; public: VersionNeedSection(); void finalizeContents() override; void writeTo(uint8_t *buf) override; size_t getSize() const override; bool isNeeded() const override; }; // MergeSyntheticSection is a class that allows us to put mergeable sections // with different attributes in a single output sections. To do that // we put them into MergeSyntheticSection synthetic input sections which are // attached to regular output sections. class MergeSyntheticSection : public SyntheticSection { public: void addSection(MergeInputSection *ms); std::vector sections; protected: MergeSyntheticSection(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment) : SyntheticSection(flags, type, alignment, name) {} }; class MergeTailSection final : public MergeSyntheticSection { public: MergeTailSection(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment); size_t getSize() const override; void writeTo(uint8_t *buf) override; void finalizeContents() override; private: llvm::StringTableBuilder builder; }; class MergeNoTailSection final : public MergeSyntheticSection { public: MergeNoTailSection(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment) : MergeSyntheticSection(name, type, flags, alignment) {} size_t getSize() const override { return size; } void writeTo(uint8_t *buf) override; void finalizeContents() override; private: // We use the most significant bits of a hash as a shard ID. // The reason why we don't want to use the least significant bits is // because DenseMap also uses lower bits to determine a bucket ID. // If we use lower bits, it significantly increases the probability of // hash collisons. size_t getShardId(uint32_t hash) { assert((hash >> 31) == 0); return hash >> (31 - llvm::countTrailingZeros(numShards)); } // Section size size_t size; // String table contents constexpr static size_t numShards = 32; std::vector shards; size_t shardOffsets[numShards]; }; // .MIPS.abiflags section. template class MipsAbiFlagsSection final : public SyntheticSection { using Elf_Mips_ABIFlags = llvm::object::Elf_Mips_ABIFlags; public: static MipsAbiFlagsSection *create(); MipsAbiFlagsSection(Elf_Mips_ABIFlags flags); size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); } void writeTo(uint8_t *buf) override; private: Elf_Mips_ABIFlags flags; }; // .MIPS.options section. template class MipsOptionsSection final : public SyntheticSection { using Elf_Mips_Options = llvm::object::Elf_Mips_Options; using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo; public: static MipsOptionsSection *create(); MipsOptionsSection(Elf_Mips_RegInfo reginfo); void writeTo(uint8_t *buf) override; size_t getSize() const override { return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); } private: Elf_Mips_RegInfo reginfo; }; // MIPS .reginfo section. template class MipsReginfoSection final : public SyntheticSection { using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo; public: static MipsReginfoSection *create(); MipsReginfoSection(Elf_Mips_RegInfo reginfo); size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); } void writeTo(uint8_t *buf) override; private: Elf_Mips_RegInfo reginfo; }; // This is a MIPS specific section to hold a space within the data segment // of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. // See "Dynamic section" in Chapter 5 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf class MipsRldMapSection : public SyntheticSection { public: MipsRldMapSection(); size_t getSize() const override { return config->wordsize; } void writeTo(uint8_t *buf) override {} }; // Representation of the combined .ARM.Exidx input sections. We process these // as a SyntheticSection like .eh_frame as we need to merge duplicate entries // and add terminating sentinel entries. // // The .ARM.exidx input sections after SHF_LINK_ORDER processing is done form // a table that the unwinder can derive (Addresses are encoded as offsets from // table): // | Address of function | Unwind instructions for function | // where the unwind instructions are either a small number of unwind or the // special EXIDX_CANTUNWIND entry representing no unwinding information. // When an exception is thrown from an address A, the unwinder searches the // table for the closest table entry with Address of function <= A. This means // that for two consecutive table entries: // | A1 | U1 | // | A2 | U2 | // The range of addresses described by U1 is [A1, A2) // // There are two cases where we need a linker generated table entry to fixup // the address ranges in the table // Case 1: // - A sentinel entry added with an address higher than all // executable sections. This was needed to work around libunwind bug pr31091. // - After address assignment we need to find the highest addressed executable // section and use the limit of that section so that the unwinder never // matches it. // Case 2: // - InputSections without a .ARM.exidx section (usually from Assembly) // need a table entry so that they terminate the range of the previously // function. This is pr40277. // // Instead of storing pointers to the .ARM.exidx InputSections from // InputObjects, we store pointers to the executable sections that need // .ARM.exidx sections. We can then use the dependentSections of these to // either find the .ARM.exidx section or know that we need to generate one. class ARMExidxSyntheticSection : public SyntheticSection { public: ARMExidxSyntheticSection(); // Add an input section to the ARMExidxSyntheticSection. Returns whether the // section needs to be removed from the main input section list. bool addSection(InputSection *isec); size_t getSize() const override { return size; } void writeTo(uint8_t *buf) override; bool isNeeded() const override; // Sort and remove duplicate entries. void finalizeContents() override; InputSection *getLinkOrderDep() const; static bool classof(const SectionBase *d); // Links to the ARMExidxSections so we can transfer the relocations once the // layout is known. std::vector exidxSections; private: size_t size; // Instead of storing pointers to the .ARM.exidx InputSections from // InputObjects, we store pointers to the executable sections that need // .ARM.exidx sections. We can then use the dependentSections of these to // either find the .ARM.exidx section or know that we need to generate one. std::vector executableSections; // The executable InputSection with the highest address to use for the // sentinel. We store separately from ExecutableSections as merging of // duplicate entries may mean this InputSection is removed from // ExecutableSections. InputSection *sentinel = nullptr; }; // A container for one or more linker generated thunks. Instances of these // thunks including ARM interworking and Mips LA25 PI to non-PI thunks. class ThunkSection : public SyntheticSection { public: // ThunkSection in OS, with desired outSecOff of Off ThunkSection(OutputSection *os, uint64_t off); // Add a newly created Thunk to this container: // Thunk is given offset from start of this InputSection // Thunk defines a symbol in this InputSection that can be used as target // of a relocation void addThunk(Thunk *t); size_t getSize() const override; void writeTo(uint8_t *buf) override; InputSection *getTargetInputSection() const; bool assignOffsets(); // When true, round up reported size of section to 4 KiB. See comment // in addThunkSection() for more details. bool roundUpSizeForErrata = false; private: std::vector thunks; size_t size = 0; }; // Used to compute outSecOff of .got2 in each object file. This is needed to // synthesize PLT entries for PPC32 Secure PLT ABI. class PPC32Got2Section final : public SyntheticSection { public: PPC32Got2Section(); size_t getSize() const override { return 0; } bool isNeeded() const override; void finalizeContents() override; void writeTo(uint8_t *buf) override {} }; // This section is used to store the addresses of functions that are called // in range-extending thunks on PowerPC64. When producing position dependent // code the addresses are link-time constants and the table is written out to // the binary. When producing position-dependent code the table is allocated and // filled in by the dynamic linker. class PPC64LongBranchTargetSection final : public SyntheticSection { public: PPC64LongBranchTargetSection(); uint64_t getEntryVA(const Symbol *sym, int64_t addend); llvm::Optional addEntry(const Symbol *sym, int64_t addend); size_t getSize() const override; void writeTo(uint8_t *buf) override; bool isNeeded() const override; void finalizeContents() override { finalized = true; } private: std::vector> entries; llvm::DenseMap, uint32_t> entry_index; bool finalized = false; }; template class PartitionElfHeaderSection : public SyntheticSection { public: PartitionElfHeaderSection(); size_t getSize() const override; void writeTo(uint8_t *buf) override; }; template class PartitionProgramHeadersSection : public SyntheticSection { public: PartitionProgramHeadersSection(); size_t getSize() const override; void writeTo(uint8_t *buf) override; }; class PartitionIndexSection : public SyntheticSection { public: PartitionIndexSection(); size_t getSize() const override; void finalizeContents() override; void writeTo(uint8_t *buf) override; }; InputSection *createInterpSection(); MergeInputSection *createCommentSection(); MergeSyntheticSection *createMergeSynthetic(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment); template void splitSections(); template void writeEhdr(uint8_t *buf, Partition &part); template void writePhdrs(uint8_t *buf, Partition &part); Defined *addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, uint64_t size, InputSectionBase §ion); void addVerneed(Symbol *ss); // Linker generated per-partition sections. struct Partition { StringRef name; uint64_t nameStrTab; SyntheticSection *elfHeader; SyntheticSection *programHeaders; std::vector phdrs; ARMExidxSyntheticSection *armExidx; BuildIdSection *buildId; SyntheticSection *dynamic; StringTableSection *dynStrTab; SymbolTableBaseSection *dynSymTab; EhFrameHeader *ehFrameHdr; EhFrameSection *ehFrame; GnuHashTableSection *gnuHashTab; HashTableSection *hashTab; RelocationBaseSection *relaDyn; RelrBaseSection *relrDyn; VersionDefinitionSection *verDef; SyntheticSection *verNeed; VersionTableSection *verSym; unsigned getNumber() const { return this - &partitions[0] + 1; } }; extern Partition *mainPart; inline Partition &SectionBase::getPartition() const { assert(isLive()); return partitions[partition - 1]; } // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { InputSection *armAttributes; BssSection *bss; BssSection *bssRelRo; GotSection *got; GotPltSection *gotPlt; IgotPltSection *igotPlt; PPC64LongBranchTargetSection *ppc64LongBranchTarget; MipsGotSection *mipsGot; MipsRldMapSection *mipsRldMap; SyntheticSection *partEnd; SyntheticSection *partIndex; PltSection *plt; IpltSection *iplt; PPC32Got2Section *ppc32Got2; IBTPltSection *ibtPlt; RelocationBaseSection *relaPlt; RelocationBaseSection *relaIplt; StringTableSection *shStrTab; StringTableSection *strTab; SymbolTableBaseSection *symTab; SymtabShndxSection *symTabShndx; }; extern InStruct in; } // namespace elf } // namespace lld #endif Index: head/contrib/llvm-project/lld/ELF/Writer.cpp =================================================================== --- head/contrib/llvm-project/lld/ELF/Writer.cpp (revision 359083) +++ head/contrib/llvm-project/lld/ELF/Writer.cpp (revision 359084) @@ -1,2730 +1,2731 @@ //===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "AArch64ErrataFix.h" #include "ARMErrataFix.h" #include "CallGraphSort.h" #include "Config.h" #include "LinkerScript.h" #include "MapFile.h" #include "OutputSections.h" #include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/xxhash.h" #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; namespace lld { namespace elf { namespace { // The writer writes a SymbolTable result to a file. template class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} using Elf_Shdr = typename ELFT::Shdr; using Elf_Ehdr = typename ELFT::Ehdr; using Elf_Phdr = typename ELFT::Phdr; void run(); private: void copyLocalSymbols(); void addSectionSymbols(); void forEachRelSec(llvm::function_ref fn); void sortSections(); void resolveShfLinkOrder(); void finalizeAddressDependentContent(); void sortInputSections(); void finalizeSections(); void checkExecuteOnly(); void setReservedSymbolSections(); std::vector createPhdrs(Partition &part); void addPhdrForSection(Partition &part, unsigned shType, unsigned pType, unsigned pFlags); void assignFileOffsets(); void assignFileOffsetsBinary(); void setPhdrs(Partition &part); void checkSections(); void fixSectionAlignments(); void openFile(); void writeTrapInstr(); void writeHeader(); void writeSections(); void writeSectionsBinary(); void writeBuildId(); std::unique_ptr &buffer; void addRelIpltSymbols(); void addStartEndSymbols(); void addStartStopSymbols(OutputSection *sec); uint64_t fileSize; uint64_t sectionHeaderOff; }; } // anonymous namespace static bool isSectionPrefix(StringRef prefix, StringRef name) { return name.startswith(prefix) || name == prefix.drop_back(); } StringRef getOutputSectionName(const InputSectionBase *s) { if (config->relocatable) return s->name; // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want // to emit .rela.text.foo as .rela.text.bar for consistency (this is not // technically required, but not doing it is odd). This code guarantees that. if (auto *isec = dyn_cast(s)) { if (InputSectionBase *rel = isec->getRelocatedSection()) { OutputSection *out = rel->getOutputSection(); if (s->type == SHT_RELA) return saver.save(".rela" + out->name); return saver.save(".rel" + out->name); } } // This check is for -z keep-text-section-prefix. This option separates text // sections with prefix ".text.hot", ".text.unlikely", ".text.startup" or // ".text.exit". // When enabled, this allows identifying the hot code region (.text.hot) in // the final binary which can be selectively mapped to huge pages or mlocked, // for instance. if (config->zKeepTextSectionPrefix) for (StringRef v : {".text.hot.", ".text.unlikely.", ".text.startup.", ".text.exit."}) if (isSectionPrefix(v, s->name)) return v.drop_back(); for (StringRef v : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) if (isSectionPrefix(v, s->name)) return v.drop_back(); // CommonSection is identified as "COMMON" in linker scripts. // By default, it should go to .bss section. if (s->name == "COMMON") return ".bss"; return s->name; } static bool needsInterpSection() { return !config->relocatable && !config->shared && !config->dynamicLinker.empty() && script->needsInterpSection(); } template void writeResult() { Writer().run(); } static void removeEmptyPTLoad(std::vector &phdrs) { llvm::erase_if(phdrs, [&](const PhdrEntry *p) { if (p->p_type != PT_LOAD) return false; if (!p->firstSec) return true; uint64_t size = p->lastSec->addr + p->lastSec->size - p->firstSec->addr; return size == 0; }); } void copySectionsIntoPartitions() { std::vector newSections; for (unsigned part = 2; part != partitions.size() + 1; ++part) { for (InputSectionBase *s : inputSections) { if (!(s->flags & SHF_ALLOC) || !s->isLive()) continue; InputSectionBase *copy; if (s->type == SHT_NOTE) copy = make(cast(*s)); else if (auto *es = dyn_cast(s)) copy = make(*es); else continue; copy->partition = part; newSections.push_back(copy); } } inputSections.insert(inputSections.end(), newSections.begin(), newSections.end()); } void combineEhSections() { for (InputSectionBase *&s : inputSections) { // Ignore dead sections and the partition end marker (.part.end), // whose partition number is out of bounds. if (!s->isLive() || s->partition == 255) continue; Partition &part = s->getPartition(); if (auto *es = dyn_cast(s)) { part.ehFrame->addSection(es); s = nullptr; } else if (s->kind() == SectionBase::Regular && part.armExidx && part.armExidx->addSection(cast(s))) { s = nullptr; } } std::vector &v = inputSections; v.erase(std::remove(v.begin(), v.end(), nullptr), v.end()); } static Defined *addOptionalRegular(StringRef name, SectionBase *sec, uint64_t val, uint8_t stOther = STV_HIDDEN, uint8_t binding = STB_GLOBAL) { Symbol *s = symtab->find(name); if (!s || s->isDefined()) return nullptr; s->resolve(Defined{/*file=*/nullptr, name, binding, stOther, STT_NOTYPE, val, /*size=*/0, sec}); return cast(s); } static Defined *addAbsolute(StringRef name) { Symbol *sym = symtab->addSymbol(Defined{nullptr, name, STB_GLOBAL, STV_HIDDEN, STT_NOTYPE, 0, 0, nullptr}); return cast(sym); } // The linker is expected to define some symbols depending on // the linking result. This function defines such symbols. void addReservedSymbols() { if (config->emachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer // so that it points to an absolute address which by default is relative // to GOT. Default offset is 0x7ff0. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf ElfSym::mipsGp = addAbsolute("_gp"); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between // start of function and 'gp' pointer into GOT. if (symtab->find("_gp_disp")) ElfSym::mipsGpDisp = addAbsolute("_gp_disp"); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html if (symtab->find("__gnu_local_gp")) ElfSym::mipsLocalGp = addAbsolute("__gnu_local_gp"); } else if (config->emachine == EM_PPC) { // glibc *crt1.o has a undefined reference to _SDA_BASE_. Since we don't // support Small Data Area, define it arbitrarily as 0. addOptionalRegular("_SDA_BASE_", nullptr, 0, STV_HIDDEN); } // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which // combines the typical ELF GOT with the small data sections. It commonly // includes .got .toc .sdata .sbss. The .TOC. symbol replaces both // _GLOBAL_OFFSET_TABLE_ and _SDA_BASE_ from the 32-bit ABI. It is used to // represent the TOC base which is offset by 0x8000 bytes from the start of // the .got section. // We do not allow _GLOBAL_OFFSET_TABLE_ to be defined by input objects as the // correctness of some relocations depends on its value. StringRef gotSymName = (config->emachine == EM_PPC64) ? ".TOC." : "_GLOBAL_OFFSET_TABLE_"; if (Symbol *s = symtab->find(gotSymName)) { if (s->isDefined()) { error(toString(s->file) + " cannot redefine linker defined symbol '" + gotSymName + "'"); return; } uint64_t gotOff = 0; if (config->emachine == EM_PPC64) gotOff = 0x8000; s->resolve(Defined{/*file=*/nullptr, gotSymName, STB_GLOBAL, STV_HIDDEN, STT_NOTYPE, gotOff, /*size=*/0, Out::elfHeader}); ElfSym::globalOffsetTable = cast(s); } // __ehdr_start is the location of ELF file headers. Note that we define // this symbol unconditionally even when using a linker script, which // differs from the behavior implemented by GNU linker which only define // this symbol if ELF headers are in the memory mapped segment. addOptionalRegular("__ehdr_start", Out::elfHeader, 0, STV_HIDDEN); // __executable_start is not documented, but the expectation of at // least the Android libc is that it points to the ELF header. addOptionalRegular("__executable_start", Out::elfHeader, 0, STV_HIDDEN); // __dso_handle symbol is passed to cxa_finalize as a marker to identify // each DSO. The address of the symbol doesn't matter as long as they are // different in different DSOs, so we chose the start address of the DSO. addOptionalRegular("__dso_handle", Out::elfHeader, 0, STV_HIDDEN); // If linker script do layout we do not need to create any standard symbols. if (script->hasSectionsCommand) return; auto add = [](StringRef s, int64_t pos) { return addOptionalRegular(s, Out::elfHeader, pos, STV_DEFAULT); }; ElfSym::bss = add("__bss_start", 0); ElfSym::end1 = add("end", -1); ElfSym::end2 = add("_end", -1); ElfSym::etext1 = add("etext", -1); ElfSym::etext2 = add("_etext", -1); ElfSym::edata1 = add("edata", -1); ElfSym::edata2 = add("_edata", -1); } static OutputSection *findSection(StringRef name, unsigned partition = 1) { for (BaseCommand *base : script->sectionCommands) if (auto *sec = dyn_cast(base)) if (sec->name == name && sec->partition == partition) return sec; return nullptr; } template void createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::first, 0, sizeof(Out)); // Add the .interp section first because it is not a SyntheticSection. // The removeUnusedSyntheticSections() function relies on the // SyntheticSections coming last. if (needsInterpSection()) { for (size_t i = 1; i <= partitions.size(); ++i) { InputSection *sec = createInterpSection(); sec->partition = i; inputSections.push_back(sec); } } auto add = [](SyntheticSection *sec) { inputSections.push_back(sec); }; in.shStrTab = make(".shstrtab", false); Out::programHeaders = make("", 0, SHF_ALLOC); Out::programHeaders->alignment = config->wordsize; if (config->strip != StripPolicy::All) { in.strTab = make(".strtab", false); in.symTab = make>(*in.strTab); in.symTabShndx = make(); } in.bss = make(".bss", 0, 1); add(in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name // .data.rel.ro.bss so that we match in the .data.rel.ro output section. // This makes sure our relro is contiguous. bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro", 0); in.bssRelRo = make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); add(in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { in.mipsRldMap = make(); add(in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) add(sec); if (auto *sec = MipsOptionsSection::create()) add(sec); if (auto *sec = MipsReginfoSection::create()) add(sec); } StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; for (Partition &part : partitions) { auto add = [&](SyntheticSection *sec) { sec->partition = part.getNumber(); inputSections.push_back(sec); }; if (!part.name.empty()) { part.elfHeader = make>(); part.elfHeader->name = part.name; add(part.elfHeader); part.programHeaders = make>(); add(part.programHeaders); } if (config->buildId != BuildIdKind::None) { part.buildId = make(); add(part.buildId); } part.dynStrTab = make(".dynstr", true); part.dynSymTab = make>(*part.dynStrTab); part.dynamic = make>(); if (config->androidPackDynRelocs) part.relaDyn = make>(relaDynName); else part.relaDyn = make>(relaDynName, config->zCombreloc); if (config->hasDynSymTab) { part.dynSymTab = make>(*part.dynStrTab); add(part.dynSymTab); part.verSym = make(); add(part.verSym); if (!namedVersionDefs().empty()) { part.verDef = make(); add(part.verDef); } part.verNeed = make>(); add(part.verNeed); if (config->gnuHash) { part.gnuHashTab = make(); add(part.gnuHashTab); } if (config->sysvHash) { part.hashTab = make(); add(part.hashTab); } add(part.dynamic); add(part.dynStrTab); add(part.relaDyn); } if (config->relrPackDynRelocs) { part.relrDyn = make>(); add(part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { part.ehFrameHdr = make(); add(part.ehFrameHdr); } part.ehFrame = make(); add(part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. part.armExidx = make(); add(part.armExidx); } } if (partitions.size() != 1) { // Create the partition end marker. This needs to be in partition number 255 // so that it is sorted after all other partitions. It also has other // special handling (see createPhdrs() and combineEhSections()). in.partEnd = make(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; add(in.partEnd); in.partIndex = make(); addOptionalRegular("__part_index_begin", in.partIndex, 0); addOptionalRegular("__part_index_end", in.partIndex, in.partIndex->getSize()); add(in.partIndex); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { in.mipsGot = make(); add(in.mipsGot); } else { in.got = make(); add(in.got); } if (config->emachine == EM_PPC) { in.ppc32Got2 = make(); add(in.ppc32Got2); } if (config->emachine == EM_PPC64) { in.ppc64LongBranchTarget = make(); add(in.ppc64LongBranchTarget); } in.gotPlt = make(); add(in.gotPlt); in.igotPlt = make(); add(in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat // it as a relocation and ensure the referenced section is created. if (ElfSym::globalOffsetTable && config->emachine != EM_MIPS) { if (target->gotBaseSymInGotPlt) in.gotPlt->hasGotPltOffRel = true; else in.got->hasGotOffRel = true; } if (config->gdbIndex) add(GdbIndexSection::create()); // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. in.relaPlt = make>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); add(in.relaPlt); // The relaIplt immediately follows .rel[a].dyn to ensure that the IRelative // relocations are processed last by the dynamic loader. We cannot place the // iplt section in .rel.dyn when Android relocation packing is enabled because // that would cause a section type mismatch. However, because the Android // dynamic loader reads .rel.plt after .rel.dyn, we can get the desired // behaviour by placing the iplt section in .rel.plt. in.relaIplt = make>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); add(in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { in.ibtPlt = make(); add(in.ibtPlt); } - in.plt = make(); + in.plt = config->emachine == EM_PPC ? make() + : make(); add(in.plt); in.iplt = make(); add(in.iplt); if (config->andFeatures) add(make()); // .note.GNU-stack is always added when we are creating a re-linkable // object file. Other linkers are using the presence of this marker // section to control the executable-ness of the stack area, but that // is irrelevant these days. Stack area should always be non-executable // by default. So we emit this section unconditionally. if (config->relocatable) add(make()); if (in.symTab) add(in.symTab); if (in.symTabShndx) add(in.symTabShndx); add(in.shStrTab); if (in.strTab) add(in.strTab); } // The main function of the writer. template void Writer::run() { if (config->discard != DiscardPolicy::All) copyLocalSymbols(); if (config->copyRelocs) addSectionSymbols(); // Now that we have a complete set of output sections. This function // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. // finalizeSections does that. finalizeSections(); checkExecuteOnly(); if (errorCount()) return; // If -compressed-debug-sections is specified, we need to compress // .debug_* sections. Do it right now because it changes the size of // output sections. for (OutputSection *sec : outputSections) sec->maybeCompress(); if (script->hasSectionsCommand) script->allocateHeaders(mainPart->phdrs); // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a // 0 sized region. This has to be done late since only after assignAddresses // we know the size of the sections. for (Partition &part : partitions) removeEmptyPTLoad(part.phdrs); if (!config->oFormatBinary) assignFileOffsets(); else assignFileOffsetsBinary(); for (Partition &part : partitions) setPhdrs(part); if (config->relocatable) for (OutputSection *sec : outputSections) sec->addr = 0; if (config->checkSections) checkSections(); // It does not make sense try to open the file if we have error already. if (errorCount()) return; // Write the result down to a file. openFile(); if (errorCount()) return; if (!config->oFormatBinary) { if (config->zSeparate != SeparateSegmentKind::None) writeTrapInstr(); writeHeader(); writeSections(); } else { writeSectionsBinary(); } // Backfill .note.gnu.build-id section content. This is done at last // because the content is usually a hash value of the entire output file. writeBuildId(); if (errorCount()) return; // Handle -Map and -cref options. writeMapFile(); writeCrossReferenceTable(); if (errorCount()) return; if (auto e = buffer->commit()) error("failed to write to the output file: " + toString(std::move(e))); } static bool shouldKeepInSymtab(const Defined &sym) { if (sym.isSection()) return false; if (config->discard == DiscardPolicy::None) return true; // If -emit-reloc is given, all symbols including local ones need to be // copied because they may be referenced by relocations. if (config->emitRelocs) return true; // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. // * The symbol is in a SHF_MERGE section, which is normally the reason for // the assembler keeping the .L symbol. StringRef name = sym.getName(); bool isLocal = name.startswith(".L") || name.empty(); if (!isLocal) return true; if (config->discard == DiscardPolicy::Locals) return false; SectionBase *sec = sym.section; return !sec || !(sec->flags & SHF_MERGE); } static bool includeInSymtab(const Symbol &b) { if (!b.isLocal() && !b.isUsedInRegularObj) return false; if (auto *d = dyn_cast(&b)) { // Always include absolute symbols. SectionBase *sec = d->section; if (!sec) return true; sec = sec->repl; // Exclude symbols pointing to garbage-collected sections. if (isa(sec) && !sec->isLive()) return false; if (auto *s = dyn_cast(sec)) if (!s->getSectionPiece(d->value)->live) return false; return true; } return b.used; } // Local symbols are not in the linker's symbol table. This function scans // each object file's symbol table to copy local symbols to the output. template void Writer::copyLocalSymbols() { if (!in.symTab) return; for (InputFile *file : objectFiles) { ObjFile *f = cast>(file); for (Symbol *b : f->getLocalSymbols()) { if (!b->isLocal()) fatal(toString(f) + ": broken object: getLocalSymbols returns a non-local symbol"); auto *dr = dyn_cast(b); // No reason to keep local undefined symbol in symtab. if (!dr) continue; if (!includeInSymtab(*b)) continue; if (!shouldKeepInSymtab(*dr)) continue; in.symTab->addSymbol(b); } } } // Create a section symbol for each output section so that we can represent // relocations that point to the section. If we know that no relocation is // referring to a section (that happens if the section is a synthetic one), we // don't create a section symbol for that section. template void Writer::addSectionSymbols() { for (BaseCommand *base : script->sectionCommands) { auto *sec = dyn_cast(base); if (!sec) continue; auto i = llvm::find_if(sec->sectionCommands, [](BaseCommand *base) { if (auto *isd = dyn_cast(base)) return !isd->sections.empty(); return false; }); if (i == sec->sectionCommands.end()) continue; InputSectionBase *isec = cast(*i)->sections[0]; // Relocations are not using REL[A] section symbols. if (isec->type == SHT_REL || isec->type == SHT_RELA) continue; // Unlike other synthetic sections, mergeable output sections contain data // copied from input sections, and there may be a relocation pointing to its // contents if -r or -emit-reloc are given. if (isa(isec) && !(isec->flags & SHF_MERGE)) continue; auto *sym = make(isec->file, "", STB_LOCAL, /*stOther=*/0, STT_SECTION, /*value=*/0, /*size=*/0, isec); in.symTab->addSymbol(sym); } } // Today's loaders have a feature to make segments read-only after // processing dynamic relocations to enhance security. PT_GNU_RELRO // is defined for that. // // This function returns true if a section needs to be put into a // PT_GNU_RELRO segment. static bool isRelroSection(const OutputSection *sec) { if (!config->zRelro) return false; uint64_t flags = sec->flags; // Non-allocatable or non-writable sections don't need RELRO because // they are not writable or not even mapped to memory in the first place. // RELRO is for sections that are essentially read-only but need to // be writable only at process startup to allow dynamic linker to // apply relocations. if (!(flags & SHF_ALLOC) || !(flags & SHF_WRITE)) return false; // Once initialized, TLS data segments are used as data templates // for a thread-local storage. For each new thread, runtime // allocates memory for a TLS and copy templates there. No thread // are supposed to use templates directly. Thus, it can be in RELRO. if (flags & SHF_TLS) return true; // .init_array, .preinit_array and .fini_array contain pointers to // functions that are executed on process startup or exit. These // pointers are set by the static linker, and they are not expected // to change at runtime. But if you are an attacker, you could do // interesting things by manipulating pointers in .fini_array, for // example. So they are put into RELRO. uint32_t type = sec->type; if (type == SHT_INIT_ARRAY || type == SHT_FINI_ARRAY || type == SHT_PREINIT_ARRAY) return true; // .got contains pointers to external symbols. They are resolved by // the dynamic linker when a module is loaded into memory, and after // that they are not expected to change. So, it can be in RELRO. if (in.got && sec == in.got->getParent()) return true; // .toc is a GOT-ish section for PowerPC64. Their contents are accessed // through r2 register, which is reserved for that purpose. Since r2 is used // for accessing .got as well, .got and .toc need to be close enough in the // virtual address space. Usually, .toc comes just after .got. Since we place // .got into RELRO, .toc needs to be placed into RELRO too. if (sec->name.equals(".toc")) return true; // .got.plt contains pointers to external function symbols. They are // by default resolved lazily, so we usually cannot put it into RELRO. // However, if "-z now" is given, the lazy symbol resolution is // disabled, which enables us to put it into RELRO. if (sec == in.gotPlt->getParent()) return config->zNow; // .dynamic section contains data for the dynamic linker, and // there's no need to write to it at runtime, so it's better to put // it into RELRO. if (sec->name == ".dynamic") return true; // Sections with some special names are put into RELRO. This is a // bit unfortunate because section names shouldn't be significant in // ELF in spirit. But in reality many linker features depend on // magic section names. StringRef s = sec->name; return s == ".data.rel.ro" || s == ".bss.rel.ro" || s == ".ctors" || s == ".dtors" || s == ".jcr" || s == ".eh_frame" || s == ".openbsd.randomdata"; } // We compute a rank for each section. The rank indicates where the // section should be placed in the file. Instead of using simple // numbers (0,1,2...), we use a series of flags. One for each decision // point when placing the section. // Using flags has two key properties: // * It is easy to check if a give branch was taken. // * It is easy two see how similar two ranks are (see getRankProximity). enum RankFlags { RF_NOT_ADDR_SET = 1 << 27, RF_NOT_ALLOC = 1 << 26, RF_PARTITION = 1 << 18, // Partition number (8 bits) RF_NOT_PART_EHDR = 1 << 17, RF_NOT_PART_PHDR = 1 << 16, RF_NOT_INTERP = 1 << 15, RF_NOT_NOTE = 1 << 14, RF_WRITE = 1 << 13, RF_EXEC_WRITE = 1 << 12, RF_EXEC = 1 << 11, RF_RODATA = 1 << 10, RF_NOT_RELRO = 1 << 9, RF_NOT_TLS = 1 << 8, RF_BSS = 1 << 7, RF_PPC_NOT_TOCBSS = 1 << 6, RF_PPC_TOCL = 1 << 5, RF_PPC_TOC = 1 << 4, RF_PPC_GOT = 1 << 3, RF_PPC_BRANCH_LT = 1 << 2, RF_MIPS_GPREL = 1 << 1, RF_MIPS_NOT_GOT = 1 << 0 }; static unsigned getSectionRank(const OutputSection *sec) { unsigned rank = sec->partition * RF_PARTITION; // We want to put section specified by -T option first, so we // can start assigning VA starting from them later. if (config->sectionStartMap.count(sec->name)) return rank; rank |= RF_NOT_ADDR_SET; // Allocatable sections go first to reduce the total PT_LOAD size and // so debug info doesn't change addresses in actual code. if (!(sec->flags & SHF_ALLOC)) return rank | RF_NOT_ALLOC; if (sec->type == SHT_LLVM_PART_EHDR) return rank; rank |= RF_NOT_PART_EHDR; if (sec->type == SHT_LLVM_PART_PHDR) return rank; rank |= RF_NOT_PART_PHDR; // Put .interp first because some loaders want to see that section // on the first page of the executable file when loaded into memory. if (sec->name == ".interp") return rank; rank |= RF_NOT_INTERP; // Put .note sections (which make up one PT_NOTE) at the beginning so that // they are likely to be included in a core file even if core file size is // limited. In particular, we want a .note.gnu.build-id and a .note.tag to be // included in a core to match core files with executables. if (sec->type == SHT_NOTE) return rank; rank |= RF_NOT_NOTE; // Sort sections based on their access permission in the following // order: R, RX, RWX, RW. This order is based on the following // considerations: // * Read-only sections come first such that they go in the // PT_LOAD covering the program headers at the start of the file. // * Read-only, executable sections come next. // * Writable, executable sections follow such that .plt on // architectures where it needs to be writable will be placed // between .text and .data. // * Writable sections come last, such that .bss lands at the very // end of the last PT_LOAD. bool isExec = sec->flags & SHF_EXECINSTR; bool isWrite = sec->flags & SHF_WRITE; if (isExec) { if (isWrite) rank |= RF_EXEC_WRITE; else rank |= RF_EXEC; } else if (isWrite) { rank |= RF_WRITE; } else if (sec->type == SHT_PROGBITS) { // Make non-executable and non-writable PROGBITS sections (e.g .rodata // .eh_frame) closer to .text. They likely contain PC or GOT relative // relocations and there could be relocation overflow if other huge sections // (.dynstr .dynsym) were placed in between. rank |= RF_RODATA; } // Place RelRo sections first. After considering SHT_NOBITS below, the // ordering is PT_LOAD(PT_GNU_RELRO(.data.rel.ro .bss.rel.ro) | .data .bss), // where | marks where page alignment happens. An alternative ordering is // PT_LOAD(.data | PT_GNU_RELRO( .data.rel.ro .bss.rel.ro) | .bss), but it may // waste more bytes due to 2 alignment places. if (!isRelroSection(sec)) rank |= RF_NOT_RELRO; // If we got here we know that both A and B are in the same PT_LOAD. // The TLS initialization block needs to be a single contiguous block in a R/W // PT_LOAD, so stick TLS sections directly before the other RelRo R/W // sections. Since p_filesz can be less than p_memsz, place NOBITS sections // after PROGBITS. if (!(sec->flags & SHF_TLS)) rank |= RF_NOT_TLS; // Within TLS sections, or within other RelRo sections, or within non-RelRo // sections, place non-NOBITS sections first. if (sec->type == SHT_NOBITS) rank |= RF_BSS; // Some architectures have additional ordering restrictions for sections // within the same PT_LOAD. if (config->emachine == EM_PPC64) { // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections // that we would like to make sure appear is a specific order to maximize // their coverage by a single signed 16-bit offset from the TOC base // pointer. Conversely, the special .tocbss section should be first among // all SHT_NOBITS sections. This will put it next to the loaded special // PPC64 sections (and, thus, within reach of the TOC base pointer). StringRef name = sec->name; if (name != ".tocbss") rank |= RF_PPC_NOT_TOCBSS; if (name == ".toc1") rank |= RF_PPC_TOCL; if (name == ".toc") rank |= RF_PPC_TOC; if (name == ".got") rank |= RF_PPC_GOT; if (name == ".branch_lt") rank |= RF_PPC_BRANCH_LT; } if (config->emachine == EM_MIPS) { // All sections with SHF_MIPS_GPREL flag should be grouped together // because data in these sections is addressable with a gp relative address. if (sec->flags & SHF_MIPS_GPREL) rank |= RF_MIPS_GPREL; if (sec->name != ".got") rank |= RF_MIPS_NOT_GOT; } return rank; } static bool compareSections(const BaseCommand *aCmd, const BaseCommand *bCmd) { const OutputSection *a = cast(aCmd); const OutputSection *b = cast(bCmd); if (a->sortRank != b->sortRank) return a->sortRank < b->sortRank; if (!(a->sortRank & RF_NOT_ADDR_SET)) return config->sectionStartMap.lookup(a->name) < config->sectionStartMap.lookup(b->name); return false; } void PhdrEntry::add(OutputSection *sec) { lastSec = sec; if (!firstSec) firstSec = sec; p_align = std::max(p_align, sec->alignment); if (p_type == PT_LOAD) sec->ptLoad = this; } // The beginning and the ending of .rel[a].plt section are marked // with __rel[a]_iplt_{start,end} symbols if it is a statically linked // executable. The runtime needs these symbols in order to resolve // all IRELATIVE relocs on startup. For dynamic executables, we don't // need these symbols, since IRELATIVE relocs are resolved through GOT // and PLT. For details, see http://www.airs.com/blog/archives/403. template void Writer::addRelIpltSymbols() { if (config->relocatable || needsInterpSection()) return; // By default, __rela_iplt_{start,end} belong to a dummy section 0 // because .rela.plt might be empty and thus removed from output. // We'll override Out::elfHeader with In.relaIplt later when we are // sure that .rela.plt exists in output. ElfSym::relaIpltStart = addOptionalRegular( config->isRela ? "__rela_iplt_start" : "__rel_iplt_start", Out::elfHeader, 0, STV_HIDDEN, STB_WEAK); ElfSym::relaIpltEnd = addOptionalRegular( config->isRela ? "__rela_iplt_end" : "__rel_iplt_end", Out::elfHeader, 0, STV_HIDDEN, STB_WEAK); } template void Writer::forEachRelSec( llvm::function_ref fn) { // Scan all relocations. Each relocation goes through a series // of tests to determine if it needs special treatment, such as // creating GOT, PLT, copy relocations, etc. // Note that relocations for non-alloc sections are directly // processed by InputSection::relocateNonAlloc. for (InputSectionBase *isec : inputSections) if (isec->isLive() && isa(isec) && (isec->flags & SHF_ALLOC)) fn(*isec); for (Partition &part : partitions) { for (EhInputSection *es : part.ehFrame->sections) fn(*es); if (part.armExidx && part.armExidx->isLive()) for (InputSection *ex : part.armExidx->exidxSections) fn(*ex); } } // This function generates assignments for predefined symbols (e.g. _end or // _etext) and inserts them into the commands sequence to be processed at the // appropriate time. This ensures that the value is going to be correct by the // time any references to these symbols are processed and is equivalent to // defining these symbols explicitly in the linker script. template void Writer::setReservedSymbolSections() { if (ElfSym::globalOffsetTable) { // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually // to the start of the .got or .got.plt section. InputSection *gotSection = in.gotPlt; if (!target->gotBaseSymInGotPlt) gotSection = in.mipsGot ? cast(in.mipsGot) : cast(in.got); ElfSym::globalOffsetTable->section = gotSection; } // .rela_iplt_{start,end} mark the start and the end of in.relaIplt. if (ElfSym::relaIpltStart && in.relaIplt->isNeeded()) { ElfSym::relaIpltStart->section = in.relaIplt; ElfSym::relaIpltEnd->section = in.relaIplt; ElfSym::relaIpltEnd->value = in.relaIplt->getSize(); } PhdrEntry *last = nullptr; PhdrEntry *lastRO = nullptr; for (Partition &part : partitions) { for (PhdrEntry *p : part.phdrs) { if (p->p_type != PT_LOAD) continue; last = p; if (!(p->p_flags & PF_W)) lastRO = p; } } if (lastRO) { // _etext is the first location after the last read-only loadable segment. if (ElfSym::etext1) ElfSym::etext1->section = lastRO->lastSec; if (ElfSym::etext2) ElfSym::etext2->section = lastRO->lastSec; } if (last) { // _edata points to the end of the last mapped initialized section. OutputSection *edata = nullptr; for (OutputSection *os : outputSections) { if (os->type != SHT_NOBITS) edata = os; if (os == last->lastSec) break; } if (ElfSym::edata1) ElfSym::edata1->section = edata; if (ElfSym::edata2) ElfSym::edata2->section = edata; // _end is the first location after the uninitialized data region. if (ElfSym::end1) ElfSym::end1->section = last->lastSec; if (ElfSym::end2) ElfSym::end2->section = last->lastSec; } if (ElfSym::bss) ElfSym::bss->section = findSection(".bss"); // Setup MIPS _gp_disp/__gnu_local_gp symbols which should // be equal to the _gp symbol's value. if (ElfSym::mipsGp) { // Find GP-relative section with the lowest address // and use this address to calculate default _gp value. for (OutputSection *os : outputSections) { if (os->flags & SHF_MIPS_GPREL) { ElfSym::mipsGp->section = os; ElfSym::mipsGp->value = 0x7ff0; break; } } } } // We want to find how similar two ranks are. // The more branches in getSectionRank that match, the more similar they are. // Since each branch corresponds to a bit flag, we can just use // countLeadingZeros. static int getRankProximityAux(OutputSection *a, OutputSection *b) { return countLeadingZeros(a->sortRank ^ b->sortRank); } static int getRankProximity(OutputSection *a, BaseCommand *b) { auto *sec = dyn_cast(b); return (sec && sec->hasInputSections) ? getRankProximityAux(a, sec) : -1; } // When placing orphan sections, we want to place them after symbol assignments // so that an orphan after // begin_foo = .; // foo : { *(foo) } // end_foo = .; // doesn't break the intended meaning of the begin/end symbols. // We don't want to go over sections since findOrphanPos is the // one in charge of deciding the order of the sections. // We don't want to go over changes to '.', since doing so in // rx_sec : { *(rx_sec) } // . = ALIGN(0x1000); // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. static bool shouldSkip(BaseCommand *cmd) { if (auto *assign = dyn_cast(cmd)) return assign->name != "."; return false; } // We want to place orphan sections so that they share as much // characteristics with their neighbors as possible. For example, if // both are rw, or both are tls. static std::vector::iterator findOrphanPos(std::vector::iterator b, std::vector::iterator e) { OutputSection *sec = cast(*e); // Find the first element that has as close a rank as possible. auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) { return getRankProximity(sec, a) < getRankProximity(sec, b); }); if (i == e) return e; // Consider all existing sections with the same proximity. int proximity = getRankProximity(sec, *i); for (; i != e; ++i) { auto *curSec = dyn_cast(*i); if (!curSec || !curSec->hasInputSections) continue; if (getRankProximity(sec, curSec) != proximity || sec->sortRank < curSec->sortRank) break; } auto isOutputSecWithInputSections = [](BaseCommand *cmd) { auto *os = dyn_cast(cmd); return os && os->hasInputSections; }; auto j = std::find_if(llvm::make_reverse_iterator(i), llvm::make_reverse_iterator(b), isOutputSecWithInputSections); i = j.base(); // As a special case, if the orphan section is the last section, put // it at the very end, past any other commands. // This matches bfd's behavior and is convenient when the linker script fully // specifies the start of the file, but doesn't care about the end (the non // alloc sections for example). auto nextSec = std::find_if(i, e, isOutputSecWithInputSections); if (nextSec == e) return e; while (i != e && shouldSkip(*i)) ++i; return i; } // Builds section order for handling --symbol-ordering-file. static DenseMap buildSectionOrder() { DenseMap sectionOrder; // Use the rarely used option -call-graph-ordering-file to sort sections. if (!config->callGraphProfile.empty()) return computeCallGraphProfileOrder(); if (config->symbolOrderingFile.empty()) return sectionOrder; struct SymbolOrderEntry { int priority; bool present; }; // Build a map from symbols to their priorities. Symbols that didn't // appear in the symbol ordering file have the lowest priority 0. // All explicitly mentioned symbols have negative (higher) priorities. DenseMap symbolOrder; int priority = -config->symbolOrderingFile.size(); for (StringRef s : config->symbolOrderingFile) symbolOrder.insert({s, {priority++, false}}); // Build a map from sections to their priorities. auto addSym = [&](Symbol &sym) { auto it = symbolOrder.find(sym.getName()); if (it == symbolOrder.end()) return; SymbolOrderEntry &ent = it->second; ent.present = true; maybeWarnUnorderableSymbol(&sym); if (auto *d = dyn_cast(&sym)) { if (auto *sec = dyn_cast_or_null(d->section)) { int &priority = sectionOrder[cast(sec->repl)]; priority = std::min(priority, ent.priority); } } }; // We want both global and local symbols. We get the global ones from the // symbol table and iterate the object files for the local ones. for (Symbol *sym : symtab->symbols()) if (!sym->isLazy()) addSym(*sym); for (InputFile *file : objectFiles) for (Symbol *sym : file->getSymbols()) if (sym->isLocal()) addSym(*sym); if (config->warnSymbolOrdering) for (auto orderEntry : symbolOrder) if (!orderEntry.second.present) warn("symbol ordering file: no such symbol: " + orderEntry.first); return sectionOrder; } // Sorts the sections in ISD according to the provided section order. static void sortISDBySectionOrder(InputSectionDescription *isd, const DenseMap &order) { std::vector unorderedSections; std::vector> orderedSections; uint64_t unorderedSize = 0; for (InputSection *isec : isd->sections) { auto i = order.find(isec); if (i == order.end()) { unorderedSections.push_back(isec); unorderedSize += isec->getSize(); continue; } orderedSections.push_back({isec, i->second}); } llvm::sort(orderedSections, llvm::less_second()); // Find an insertion point for the ordered section list in the unordered // section list. On targets with limited-range branches, this is the mid-point // of the unordered section list. This decreases the likelihood that a range // extension thunk will be needed to enter or exit the ordered region. If the // ordered section list is a list of hot functions, we can generally expect // the ordered functions to be called more often than the unordered functions, // making it more likely that any particular call will be within range, and // therefore reducing the number of thunks required. // // For example, imagine that you have 8MB of hot code and 32MB of cold code. // If the layout is: // // 8MB hot // 32MB cold // // only the first 8-16MB of the cold code (depending on which hot function it // is actually calling) can call the hot code without a range extension thunk. // However, if we use this layout: // // 16MB cold // 8MB hot // 16MB cold // // both the last 8-16MB of the first block of cold code and the first 8-16MB // of the second block of cold code can call the hot code without a thunk. So // we effectively double the amount of code that could potentially call into // the hot code without a thunk. size_t insPt = 0; if (target->getThunkSectionSpacing() && !orderedSections.empty()) { uint64_t unorderedPos = 0; for (; insPt != unorderedSections.size(); ++insPt) { unorderedPos += unorderedSections[insPt]->getSize(); if (unorderedPos > unorderedSize / 2) break; } } isd->sections.clear(); for (InputSection *isec : makeArrayRef(unorderedSections).slice(0, insPt)) isd->sections.push_back(isec); for (std::pair p : orderedSections) isd->sections.push_back(p.first); for (InputSection *isec : makeArrayRef(unorderedSections).slice(insPt)) isd->sections.push_back(isec); } static void sortSection(OutputSection *sec, const DenseMap &order) { StringRef name = sec->name; // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). if (name == ".init_array" || name == ".fini_array") { if (!script->hasSectionsCommand) sec->sortInitFini(); return; } // Sort input sections by the special rule for .ctors and .dtors. if (name == ".ctors" || name == ".dtors") { if (!script->hasSectionsCommand) sec->sortCtorsDtors(); return; } // Never sort these. if (name == ".init" || name == ".fini") return; // .toc is allocated just after .got and is accessed using GOT-relative // relocations. Object files compiled with small code model have an // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations. // To reduce the risk of relocation overflow, .toc contents are sorted so that // sections having smaller relocation offsets are at beginning of .toc if (config->emachine == EM_PPC64 && name == ".toc") { if (script->hasSectionsCommand) return; assert(sec->sectionCommands.size() == 1); auto *isd = cast(sec->sectionCommands[0]); llvm::stable_sort(isd->sections, [](const InputSection *a, const InputSection *b) -> bool { return a->file->ppc64SmallCodeModelTocRelocs && !b->file->ppc64SmallCodeModelTocRelocs; }); return; } // Sort input sections by priority using the list provided // by --symbol-ordering-file. if (!order.empty()) for (BaseCommand *b : sec->sectionCommands) if (auto *isd = dyn_cast(b)) sortISDBySectionOrder(isd, order); } // If no layout was provided by linker script, we want to apply default // sorting for special input sections. This also handles --symbol-ordering-file. template void Writer::sortInputSections() { // Build the order once since it is expensive. DenseMap order = buildSectionOrder(); for (BaseCommand *base : script->sectionCommands) if (auto *sec = dyn_cast(base)) sortSection(sec, order); } template void Writer::sortSections() { script->adjustSectionsBeforeSorting(); // Don't sort if using -r. It is not necessary and we want to preserve the // relative order for SHF_LINK_ORDER sections. if (config->relocatable) return; sortInputSections(); for (BaseCommand *base : script->sectionCommands) { auto *os = dyn_cast(base); if (!os) continue; os->sortRank = getSectionRank(os); // We want to assign rude approximation values to outSecOff fields // to know the relative order of the input sections. We use it for // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder(). uint64_t i = 0; for (InputSection *sec : getInputSections(os)) sec->outSecOff = i++; } if (!script->hasSectionsCommand) { // We know that all the OutputSections are contiguous in this case. auto isSection = [](BaseCommand *base) { return isa(base); }; std::stable_sort( llvm::find_if(script->sectionCommands, isSection), llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), compareSections); return; } // Orphan sections are sections present in the input files which are // not explicitly placed into the output file by the linker script. // // The sections in the linker script are already in the correct // order. We have to figuere out where to insert the orphan // sections. // // The order of the sections in the script is arbitrary and may not agree with // compareSections. This means that we cannot easily define a strict weak // ordering. To see why, consider a comparison of a section in the script and // one not in the script. We have a two simple options: // * Make them equivalent (a is not less than b, and b is not less than a). // The problem is then that equivalence has to be transitive and we can // have sections a, b and c with only b in a script and a less than c // which breaks this property. // * Use compareSectionsNonScript. Given that the script order doesn't have // to match, we can end up with sections a, b, c, d where b and c are in the // script and c is compareSectionsNonScript less than b. In which case d // can be equivalent to c, a to b and d < a. As a concrete example: // .a (rx) # not in script // .b (rx) # in script // .c (ro) # in script // .d (ro) # not in script // // The way we define an order then is: // * Sort only the orphan sections. They are in the end right now. // * Move each orphan section to its preferred position. We try // to put each section in the last position where it can share // a PT_LOAD. // // There is some ambiguity as to where exactly a new entry should be // inserted, because Commands contains not only output section // commands but also other types of commands such as symbol assignment // expressions. There's no correct answer here due to the lack of the // formal specification of the linker script. We use heuristics to // determine whether a new output command should be added before or // after another commands. For the details, look at shouldSkip // function. auto i = script->sectionCommands.begin(); auto e = script->sectionCommands.end(); auto nonScriptI = std::find_if(i, e, [](BaseCommand *base) { if (auto *sec = dyn_cast(base)) return sec->sectionIndex == UINT32_MAX; return false; }); // Sort the orphan sections. std::stable_sort(nonScriptI, e, compareSections); // As a horrible special case, skip the first . assignment if it is before any // section. We do this because it is common to set a load address by starting // the script with ". = 0xabcd" and the expectation is that every section is // after that. auto firstSectionOrDotAssignment = std::find_if(i, e, [](BaseCommand *cmd) { return !shouldSkip(cmd); }); if (firstSectionOrDotAssignment != e && isa(**firstSectionOrDotAssignment)) ++firstSectionOrDotAssignment; i = firstSectionOrDotAssignment; while (nonScriptI != e) { auto pos = findOrphanPos(i, nonScriptI); OutputSection *orphan = cast(*nonScriptI); // As an optimization, find all sections with the same sort rank // and insert them with one rotate. unsigned rank = orphan->sortRank; auto end = std::find_if(nonScriptI + 1, e, [=](BaseCommand *cmd) { return cast(cmd)->sortRank != rank; }); std::rotate(pos, nonScriptI, end); nonScriptI = end; } script->adjustSectionsAfterSorting(); } static bool compareByFilePosition(InputSection *a, InputSection *b) { InputSection *la = a->getLinkOrderDep(); InputSection *lb = b->getLinkOrderDep(); OutputSection *aOut = la->getParent(); OutputSection *bOut = lb->getParent(); if (aOut != bOut) return aOut->sectionIndex < bOut->sectionIndex; return la->outSecOff < lb->outSecOff; } template void Writer::resolveShfLinkOrder() { for (OutputSection *sec : outputSections) { if (!(sec->flags & SHF_LINK_ORDER)) continue; // The ARM.exidx section use SHF_LINK_ORDER, but we have consolidated // this processing inside the ARMExidxsyntheticsection::finalizeContents(). if (!config->relocatable && config->emachine == EM_ARM && sec->type == SHT_ARM_EXIDX) continue; // Link order may be distributed across several InputSectionDescriptions // but sort must consider them all at once. std::vector scriptSections; std::vector sections; for (BaseCommand *base : sec->sectionCommands) { if (auto *isd = dyn_cast(base)) { for (InputSection *&isec : isd->sections) { scriptSections.push_back(&isec); sections.push_back(isec); InputSection *link = isec->getLinkOrderDep(); if (!link->getParent()) error(toString(isec) + ": sh_link points to discarded section " + toString(link)); } } } if (errorCount()) continue; llvm::stable_sort(sections, compareByFilePosition); for (int i = 0, n = sections.size(); i < n; ++i) *scriptSections[i] = sections[i]; } } // We need to generate and finalize the content that depends on the address of // InputSections. As the generation of the content may also alter InputSection // addresses we must converge to a fixed point. We do that here. See the comment // in Writer::finalizeSections(). template void Writer::finalizeAddressDependentContent() { ThunkCreator tc; AArch64Err843419Patcher a64p; ARMErr657417Patcher a32p; script->assignAddresses(); int assignPasses = 0; for (;;) { bool changed = target->needsThunks && tc.createThunks(outputSections); // With Thunk Size much smaller than branch range we expect to // converge quickly; if we get to 10 something has gone wrong. if (changed && tc.pass >= 10) { error("thunk creation not converged"); break; } if (config->fixCortexA53Errata843419) { if (changed) script->assignAddresses(); changed |= a64p.createFixes(); } if (config->fixCortexA8) { if (changed) script->assignAddresses(); changed |= a32p.createFixes(); } if (in.mipsGot) in.mipsGot->updateAllocSize(); for (Partition &part : partitions) { changed |= part.relaDyn->updateAllocSize(); if (part.relrDyn) changed |= part.relrDyn->updateAllocSize(); } const Defined *changedSym = script->assignAddresses(); if (!changed) { // Some symbols may be dependent on section addresses. When we break the // loop, the symbol values are finalized because a previous // assignAddresses() finalized section addresses. if (!changedSym) break; if (++assignPasses == 5) { errorOrWarn("assignment to symbol " + toString(*changedSym) + " does not converge"); break; } } } } static void finalizeSynthetic(SyntheticSection *sec) { if (sec && sec->isNeeded() && sec->getParent()) sec->finalizeContents(); } // In order to allow users to manipulate linker-synthesized sections, // we had to add synthetic sections to the input section list early, // even before we make decisions whether they are needed. This allows // users to write scripts like this: ".mygot : { .got }". // // Doing it has an unintended side effects. If it turns out that we // don't need a .got (for example) at all because there's no // relocation that needs a .got, we don't want to emit .got. // // To deal with the above problem, this function is called after // scanRelocations is called to remove synthetic sections that turn // out to be empty. static void removeUnusedSyntheticSections() { // All input synthetic sections that can be empty are placed after // all regular ones. We iterate over them all and exit at first // non-synthetic. for (InputSectionBase *s : llvm::reverse(inputSections)) { SyntheticSection *ss = dyn_cast(s); if (!ss) return; OutputSection *os = ss->getParent(); if (!os || ss->isNeeded()) continue; // If we reach here, then SS is an unused synthetic section and we want to // remove it from corresponding input section description of output section. for (BaseCommand *b : os->sectionCommands) if (auto *isd = dyn_cast(b)) llvm::erase_if(isd->sections, [=](InputSection *isec) { return isec == ss; }); } } // Create output section objects and add them to OutputSections. template void Writer::finalizeSections() { Out::preinitArray = findSection(".preinit_array"); Out::initArray = findSection(".init_array"); Out::finiArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!config->relocatable) { addStartEndSymbols(); for (BaseCommand *base : script->sectionCommands) if (auto *sec = dyn_cast(base)) addStartStopSymbols(sec); } // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. // It should be okay as no one seems to care about the type. // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (mainPart->dynamic->parent) symtab->addSymbol(Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE, /*value=*/0, /*size=*/0, mainPart->dynamic}); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); // RISC-V's gp can address +/- 2 KiB, set it to .sdata + 0x800. This symbol // should only be defined in an executable. If .sdata does not exist, its // value/section does not matter but it has to be relative, so set its // st_shndx arbitrarily to 1 (Out::elfHeader). if (config->emachine == EM_RISCV && !config->shared) { OutputSection *sec = findSection(".sdata"); ElfSym::riscvGlobalPointer = addOptionalRegular("__global_pointer$", sec ? sec : Out::elfHeader, 0x800, STV_DEFAULT, STB_GLOBAL); } if (config->emachine == EM_X86_64) { // On targets that support TLSDESC, _TLS_MODULE_BASE_ is defined in such a // way that: // // 1) Without relaxation: it produces a dynamic TLSDESC relocation that // computes 0. // 2) With LD->LE relaxation: _TLS_MODULE_BASE_@tpoff = 0 (lowest address in // the TLS block). // // 2) is special cased in @tpoff computation. To satisfy 1), we define it as // an absolute symbol of zero. This is different from GNU linkers which // define _TLS_MODULE_BASE_ relative to the first TLS section. Symbol *s = symtab->find("_TLS_MODULE_BASE_"); if (s && s->isUndefined()) { s->resolve(Defined{/*file=*/nullptr, s->getName(), STB_GLOBAL, STV_HIDDEN, STT_TLS, /*value=*/0, 0, /*section=*/nullptr}); ElfSym::tlsModuleBase = cast(s); } } // This responsible for splitting up .eh_frame section into // pieces. The relocation scan uses those pieces, so this has to be // earlier. for (Partition &part : partitions) finalizeSynthetic(part.ehFrame); for (Symbol *sym : symtab->symbols()) sym->isPreemptible = computeIsPreemptible(*sym); // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. script->processSymbolAssignments(); // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. This is called // after processSymbolAssignments() because it needs to know whether a // linker-script-defined symbol is absolute. if (!config->relocatable) { forEachRelSec(scanRelocations); reportUndefinedSymbols(); } if (in.plt && in.plt->isNeeded()) in.plt->addSymbols(); if (in.iplt && in.iplt->isNeeded()) in.iplt->addSymbols(); if (!config->allowShlibUndefined) { // Error on undefined symbols in a shared object, if all of its DT_NEEDED // entries are seen. These cases would otherwise lead to runtime errors // reported by the dynamic linker. // // ld.bfd traces all DT_NEEDED to emulate the logic of the dynamic linker to // catch more cases. That is too much for us. Our approach resembles the one // used in ld.gold, achieves a good balance to be useful but not too smart. for (SharedFile *file : sharedFiles) file->allNeededIsKnown = llvm::all_of(file->dtNeeded, [&](StringRef needed) { return symtab->soNames.count(needed); }); for (Symbol *sym : symtab->symbols()) if (sym->isUndefined() && !sym->isWeak()) if (auto *f = dyn_cast_or_null(sym->file)) if (f->allNeededIsKnown) error(toString(f) + ": undefined reference to " + toString(*sym)); } // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *sym : symtab->symbols()) { if (!includeInSymtab(*sym)) continue; if (in.symTab) in.symTab->addSymbol(sym); if (sym->includeInDynsym()) { partitions[sym->partition - 1].dynSymTab->addSymbol(sym); if (auto *file = dyn_cast_or_null(sym->file)) if (file->isNeeded && !sym->isUndefined()) addVerneed(sym); } } // We also need to scan the dynamic relocation tables of the other partitions // and add any referenced symbols to the partition's dynsym. for (Partition &part : MutableArrayRef(partitions).slice(1)) { DenseSet syms; for (const SymbolTableEntry &e : part.dynSymTab->getSymbols()) syms.insert(e.sym); for (DynamicReloc &reloc : part.relaDyn->relocs) if (reloc.sym && !reloc.useSymVA && syms.insert(reloc.sym).second) part.dynSymTab->addSymbol(reloc.sym); } // Do not proceed if there was an undefined symbol. if (errorCount()) return; if (in.mipsGot) in.mipsGot->build(); removeUnusedSyntheticSections(); sortSections(); // Now that we have the final list, create a list of all the // OutputSections for convenience. for (BaseCommand *base : script->sectionCommands) if (auto *sec = dyn_cast(base)) outputSections.push_back(sec); // Prefer command line supplied address over other constraints. for (OutputSection *sec : outputSections) { auto i = config->sectionStartMap.find(sec->name); if (i != config->sectionStartMap.end()) sec->addrExpr = [=] { return i->second; }; } // This is a bit of a hack. A value of 0 means undef, so we set it // to 1 to make __ehdr_start defined. The section number is not // particularly relevant. Out::elfHeader->sectionIndex = 1; for (size_t i = 0, e = outputSections.size(); i != e; ++i) { OutputSection *sec = outputSections[i]; sec->sectionIndex = i + 1; sec->shName = in.shStrTab->addString(sec->name); } // Binary and relocatable output does not have PHDRS. // The headers have to be created before finalize as that can influence the // image base and the dynamic section on mips includes the image base. if (!config->relocatable && !config->oFormatBinary) { for (Partition &part : partitions) { part.phdrs = script->hasPhdrsCommands() ? script->createPhdrs() : createPhdrs(part); if (config->emachine == EM_ARM) { // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME addPhdrForSection(part, SHT_ARM_EXIDX, PT_ARM_EXIDX, PF_R); } if (config->emachine == EM_MIPS) { // Add separate segments for MIPS-specific sections. addPhdrForSection(part, SHT_MIPS_REGINFO, PT_MIPS_REGINFO, PF_R); addPhdrForSection(part, SHT_MIPS_OPTIONS, PT_MIPS_OPTIONS, PF_R); addPhdrForSection(part, SHT_MIPS_ABIFLAGS, PT_MIPS_ABIFLAGS, PF_R); } } Out::programHeaders->size = sizeof(Elf_Phdr) * mainPart->phdrs.size(); // Find the TLS segment. This happens before the section layout loop so that // Android relocation packing can look up TLS symbol addresses. We only need // to care about the main partition here because all TLS symbols were moved // to the main partition (see MarkLive.cpp). for (PhdrEntry *p : mainPart->phdrs) if (p->p_type == PT_TLS) Out::tlsPhdr = p; } // Some symbols are defined in term of program headers. Now that we // have the headers, we can find out which sections they point to. setReservedSymbolSections(); finalizeSynthetic(in.bss); finalizeSynthetic(in.bssRelRo); finalizeSynthetic(in.symTabShndx); finalizeSynthetic(in.shStrTab); finalizeSynthetic(in.strTab); finalizeSynthetic(in.got); finalizeSynthetic(in.mipsGot); finalizeSynthetic(in.igotPlt); finalizeSynthetic(in.gotPlt); finalizeSynthetic(in.relaIplt); finalizeSynthetic(in.relaPlt); finalizeSynthetic(in.plt); finalizeSynthetic(in.iplt); finalizeSynthetic(in.ppc32Got2); finalizeSynthetic(in.partIndex); // Dynamic section must be the last one in this list and dynamic // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { finalizeSynthetic(part.armExidx); finalizeSynthetic(part.dynSymTab); finalizeSynthetic(part.gnuHashTab); finalizeSynthetic(part.hashTab); finalizeSynthetic(part.verDef); finalizeSynthetic(part.relaDyn); finalizeSynthetic(part.relrDyn); finalizeSynthetic(part.ehFrameHdr); finalizeSynthetic(part.verSym); finalizeSynthetic(part.verNeed); finalizeSynthetic(part.dynamic); } if (!script->hasSectionsCommand && !config->relocatable) fixSectionAlignments(); // SHFLinkOrder processing must be processed after relative section placements are // known but before addresses are allocated. resolveShfLinkOrder(); if (errorCount()) return; // This is used to: // 1) Create "thunks": // Jump instructions in many ISAs have small displacements, and therefore // they cannot jump to arbitrary addresses in memory. For example, RISC-V // JAL instruction can target only +-1 MiB from PC. It is a linker's // responsibility to create and insert small pieces of code between // sections to extend the ranges if jump targets are out of range. Such // code pieces are called "thunks". // // We add thunks at this stage. We couldn't do this before this point // because this is the earliest point where we know sizes of sections and // their layouts (that are needed to determine if jump targets are in // range). // // 2) Update the sections. We need to generate content that depends on the // address of InputSections. For example, MIPS GOT section content or // android packed relocations sections content. // // 3) Assign the final values for the linker script symbols. Linker scripts // sometimes using forward symbol declarations. We want to set the correct // values. They also might change after adding the thunks. finalizeAddressDependentContent(); // finalizeAddressDependentContent may have added local symbols to the static symbol table. finalizeSynthetic(in.symTab); finalizeSynthetic(in.ppc64LongBranchTarget); // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. for (OutputSection *sec : outputSections) sec->finalize(); } // Ensure data sections are not mixed with executable sections when // -execute-only is used. -execute-only is a feature to make pages executable // but not readable, and the feature is currently supported only on AArch64. template void Writer::checkExecuteOnly() { if (!config->executeOnly) return; for (OutputSection *os : outputSections) if (os->flags & SHF_EXECINSTR) for (InputSection *isec : getInputSections(os)) if (!(isec->flags & SHF_EXECINSTR)) error("cannot place " + toString(isec) + " into " + toString(os->name) + ": -execute-only does not support intermingling data and code"); } // The linker is expected to define SECNAME_start and SECNAME_end // symbols for a few sections. This function defines them. template void Writer::addStartEndSymbols() { // If a section does not exist, there's ambiguity as to how we // define _start and _end symbols for an init/fini section. Since // the loader assume that the symbols are always defined, we need to // always define them. But what value? The loader iterates over all // pointers between _start and _end to run global ctors/dtors, so if // the section is empty, their symbol values don't actually matter // as long as _start and _end point to the same location. // // That said, we don't want to set the symbols to 0 (which is // probably the simplest value) because that could cause some // program to fail to link due to relocation overflow, if their // program text is above 2 GiB. We use the address of the .text // section instead to prevent that failure. // // In rare situations, the .text section may not exist. If that's the // case, use the image base address as a last resort. OutputSection *Default = findSection(".text"); if (!Default) Default = Out::elfHeader; auto define = [=](StringRef start, StringRef end, OutputSection *os) { if (os) { addOptionalRegular(start, os, 0); addOptionalRegular(end, os, -1); } else { addOptionalRegular(start, Default, 0); addOptionalRegular(end, Default, 0); } }; define("__preinit_array_start", "__preinit_array_end", Out::preinitArray); define("__init_array_start", "__init_array_end", Out::initArray); define("__fini_array_start", "__fini_array_end", Out::finiArray); if (OutputSection *sec = findSection(".ARM.exidx")) define("__exidx_start", "__exidx_end", sec); } // If a section name is valid as a C identifier (which is rare because of // the leading '.'), linkers are expected to define __start_ and // __stop_ symbols. They are at beginning and end of the section, // respectively. This is not requested by the ELF standard, but GNU ld and // gold provide the feature, and used by many programs. template void Writer::addStartStopSymbols(OutputSection *sec) { StringRef s = sec->name; if (!isValidCIdentifier(s)) return; addOptionalRegular(saver.save("__start_" + s), sec, 0, STV_PROTECTED); addOptionalRegular(saver.save("__stop_" + s), sec, -1, STV_PROTECTED); } static bool needsPtLoad(OutputSection *sec) { if (!(sec->flags & SHF_ALLOC) || sec->noload) return false; // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is // responsible for allocating space for them, not the PT_LOAD that // contains the TLS initialization image. if ((sec->flags & SHF_TLS) && sec->type == SHT_NOBITS) return false; return true; } // Linker scripts are responsible for aligning addresses. Unfortunately, most // linker scripts are designed for creating two PT_LOADs only, one RX and one // RW. This means that there is no alignment in the RO to RX transition and we // cannot create a PT_LOAD there. static uint64_t computeFlags(uint64_t flags) { if (config->omagic) return PF_R | PF_W | PF_X; if (config->executeOnly && (flags & PF_X)) return flags & ~PF_R; if (config->singleRoRx && !(flags & PF_W)) return flags | PF_X; return flags; } // Decide which program headers to create and which sections to include in each // one. template std::vector Writer::createPhdrs(Partition &part) { std::vector ret; auto addHdr = [&](unsigned type, unsigned flags) -> PhdrEntry * { ret.push_back(make(type, flags)); return ret.back(); }; unsigned partNo = part.getNumber(); bool isMain = partNo == 1; // Add the first PT_LOAD segment for regular output sections. uint64_t flags = computeFlags(PF_R); PhdrEntry *load = nullptr; // nmagic or omagic output does not have PT_PHDR, PT_INTERP, or the readonly // PT_LOAD. if (!config->nmagic && !config->omagic) { // The first phdr entry is PT_PHDR which describes the program header // itself. if (isMain) addHdr(PT_PHDR, PF_R)->add(Out::programHeaders); else addHdr(PT_PHDR, PF_R)->add(part.programHeaders->getParent()); // PT_INTERP must be the second entry if exists. if (OutputSection *cmd = findSection(".interp", partNo)) addHdr(PT_INTERP, cmd->getPhdrFlags())->add(cmd); // Add the headers. We will remove them if they don't fit. // In the other partitions the headers are ordinary sections, so they don't // need to be added here. if (isMain) { load = addHdr(PT_LOAD, flags); load->add(Out::elfHeader); load->add(Out::programHeaders); } } // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after processing relocations. // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give // an error message if more than one PT_GNU_RELRO PHDR is required. PhdrEntry *relRo = make(PT_GNU_RELRO, PF_R); bool inRelroPhdr = false; OutputSection *relroEnd = nullptr; for (OutputSection *sec : outputSections) { if (sec->partition != partNo || !needsPtLoad(sec)) continue; if (isRelroSection(sec)) { inRelroPhdr = true; if (!relroEnd) relRo->add(sec); else error("section: " + sec->name + " is not contiguous with other relro" + " sections"); } else if (inRelroPhdr) { inRelroPhdr = false; relroEnd = sec; } } for (OutputSection *sec : outputSections) { if (!(sec->flags & SHF_ALLOC)) break; if (!needsPtLoad(sec)) continue; // Normally, sections in partitions other than the current partition are // ignored. But partition number 255 is a special case: it contains the // partition end marker (.part.end). It needs to be added to the main // partition so that a segment is created for it in the main partition, // which will cause the dynamic loader to reserve space for the other // partitions. if (sec->partition != partNo) { if (isMain && sec->partition == 255) addHdr(PT_LOAD, computeFlags(sec->getPhdrFlags()))->add(sec); continue; } // Segments are contiguous memory regions that has the same attributes // (e.g. executable or writable). There is one phdr for each segment. // Therefore, we need to create a new phdr when the next section has // different flags or is loaded at a discontiguous address or memory // region using AT or AT> linker script command, respectively. At the same // time, we don't want to create a separate load segment for the headers, // even if the first output section has an AT or AT> attribute. uint64_t newFlags = computeFlags(sec->getPhdrFlags()); if (!load || ((sec->lmaExpr || (sec->lmaRegion && (sec->lmaRegion != load->firstSec->lmaRegion))) && load->lastSec != Out::programHeaders) || sec->memRegion != load->firstSec->memRegion || flags != newFlags || sec == relroEnd) { load = addHdr(PT_LOAD, newFlags); flags = newFlags; } load->add(sec); } // Add a TLS segment if any. PhdrEntry *tlsHdr = make(PT_TLS, PF_R); for (OutputSection *sec : outputSections) if (sec->partition == partNo && sec->flags & SHF_TLS) tlsHdr->add(sec); if (tlsHdr->firstSec) ret.push_back(tlsHdr); // Add an entry for .dynamic. if (OutputSection *sec = part.dynamic->getParent()) addHdr(PT_DYNAMIC, sec->getPhdrFlags())->add(sec); if (relRo->firstSec) ret.push_back(relRo); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. if (part.ehFrame->isNeeded() && part.ehFrameHdr && part.ehFrame->getParent() && part.ehFrameHdr->getParent()) addHdr(PT_GNU_EH_FRAME, part.ehFrameHdr->getParent()->getPhdrFlags()) ->add(part.ehFrameHdr->getParent()); // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes // the dynamic linker fill the segment with random data. if (OutputSection *cmd = findSection(".openbsd.randomdata", partNo)) addHdr(PT_OPENBSD_RANDOMIZE, cmd->getPhdrFlags())->add(cmd); if (config->zGnustack != GnuStackKind::None) { // PT_GNU_STACK is a special section to tell the loader to make the // pages for the stack non-executable. If you really want an executable // stack, you can pass -z execstack, but that's not recommended for // security reasons. unsigned perm = PF_R | PF_W; if (config->zGnustack == GnuStackKind::Exec) perm |= PF_X; addHdr(PT_GNU_STACK, perm)->p_memsz = config->zStackSize; } // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable // is expected to perform W^X violations, such as calling mprotect(2) or // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on // OpenBSD. if (config->zWxneeded) addHdr(PT_OPENBSD_WXNEEDED, PF_X); if (OutputSection *cmd = findSection(".note.gnu.property", partNo)) addHdr(PT_GNU_PROPERTY, PF_R)->add(cmd); // Create one PT_NOTE per a group of contiguous SHT_NOTE sections with the // same alignment. PhdrEntry *note = nullptr; for (OutputSection *sec : outputSections) { if (sec->partition != partNo) continue; if (sec->type == SHT_NOTE && (sec->flags & SHF_ALLOC)) { if (!note || sec->lmaExpr || note->lastSec->alignment != sec->alignment) note = addHdr(PT_NOTE, PF_R); note->add(sec); } else { note = nullptr; } } return ret; } template void Writer::addPhdrForSection(Partition &part, unsigned shType, unsigned pType, unsigned pFlags) { unsigned partNo = part.getNumber(); auto i = llvm::find_if(outputSections, [=](OutputSection *cmd) { return cmd->partition == partNo && cmd->type == shType; }); if (i == outputSections.end()) return; PhdrEntry *entry = make(pType, pFlags); entry->add(*i); part.phdrs.push_back(entry); } // Place the first section of each PT_LOAD to a different page (of maxPageSize). // This is achieved by assigning an alignment expression to addrExpr of each // such section. template void Writer::fixSectionAlignments() { const PhdrEntry *prev; auto pageAlign = [&](const PhdrEntry *p) { OutputSection *cmd = p->firstSec; if (cmd && !cmd->addrExpr) { // Prefer advancing to align(dot, maxPageSize) + dot%maxPageSize to avoid // padding in the file contents. // // When -z separate-code is used we must not have any overlap in pages // between an executable segment and a non-executable segment. We align to // the next maximum page size boundary on transitions between executable // and non-executable segments. // // SHT_LLVM_PART_EHDR marks the start of a partition. The partition // sections will be extracted to a separate file. Align to the next // maximum page size boundary so that we can find the ELF header at the // start. We cannot benefit from overlapping p_offset ranges with the // previous segment anyway. if (config->zSeparate == SeparateSegmentKind::Loadable || (config->zSeparate == SeparateSegmentKind::Code && prev && (prev->p_flags & PF_X) != (p->p_flags & PF_X)) || cmd->type == SHT_LLVM_PART_EHDR) cmd->addrExpr = [] { return alignTo(script->getDot(), config->maxPageSize); }; // PT_TLS is at the start of the first RW PT_LOAD. If `p` includes PT_TLS, // it must be the RW. Align to p_align(PT_TLS) to make sure // p_vaddr(PT_LOAD)%p_align(PT_LOAD) = 0. Otherwise, if // sh_addralign(.tdata) < sh_addralign(.tbss), we will set p_align(PT_TLS) // to sh_addralign(.tbss), while p_vaddr(PT_TLS)=p_vaddr(PT_LOAD) may not // be congruent to 0 modulo p_align(PT_TLS). // // Technically this is not required, but as of 2019, some dynamic loaders // don't handle p_vaddr%p_align != 0 correctly, e.g. glibc (i386 and // x86-64) doesn't make runtime address congruent to p_vaddr modulo // p_align for dynamic TLS blocks (PR/24606), FreeBSD rtld has the same // bug, musl (TLS Variant 1 architectures) before 1.1.23 handled TLS // blocks correctly. We need to keep the workaround for a while. else if (Out::tlsPhdr && Out::tlsPhdr->firstSec == p->firstSec) cmd->addrExpr = [] { return alignTo(script->getDot(), config->maxPageSize) + alignTo(script->getDot() % config->maxPageSize, Out::tlsPhdr->p_align); }; else cmd->addrExpr = [] { return alignTo(script->getDot(), config->maxPageSize) + script->getDot() % config->maxPageSize; }; } }; for (Partition &part : partitions) { prev = nullptr; for (const PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD && p->firstSec) { pageAlign(p); prev = p; } } } // Compute an in-file position for a given section. The file offset must be the // same with its virtual address modulo the page size, so that the loader can // load executables without any address adjustment. static uint64_t computeFileOffset(OutputSection *os, uint64_t off) { // The first section in a PT_LOAD has to have congruent offset and address // modulo the maximum page size. if (os->ptLoad && os->ptLoad->firstSec == os) return alignTo(off, os->ptLoad->p_align, os->addr); // File offsets are not significant for .bss sections other than the first one // in a PT_LOAD. By convention, we keep section offsets monotonically // increasing rather than setting to zero. if (os->type == SHT_NOBITS) return off; // If the section is not in a PT_LOAD, we just have to align it. if (!os->ptLoad) return alignTo(off, os->alignment); // If two sections share the same PT_LOAD the file offset is calculated // using this formula: Off2 = Off1 + (VA2 - VA1). OutputSection *first = os->ptLoad->firstSec; return first->offset + os->addr - first->addr; } // Set an in-file position to a given section and returns the end position of // the section. static uint64_t setFileOffset(OutputSection *os, uint64_t off) { off = computeFileOffset(os, off); os->offset = off; if (os->type == SHT_NOBITS) return off; return off + os->size; } template void Writer::assignFileOffsetsBinary() { uint64_t off = 0; for (OutputSection *sec : outputSections) if (sec->flags & SHF_ALLOC) off = setFileOffset(sec, off); fileSize = alignTo(off, config->wordsize); } static std::string rangeToString(uint64_t addr, uint64_t len) { return "[0x" + utohexstr(addr) + ", 0x" + utohexstr(addr + len - 1) + "]"; } // Assign file offsets to output sections. template void Writer::assignFileOffsets() { uint64_t off = 0; off = setFileOffset(Out::elfHeader, off); off = setFileOffset(Out::programHeaders, off); PhdrEntry *lastRX = nullptr; for (Partition &part : partitions) for (PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD && (p->p_flags & PF_X)) lastRX = p; for (OutputSection *sec : outputSections) { off = setFileOffset(sec, off); // If this is a last section of the last executable segment and that // segment is the last loadable segment, align the offset of the // following section to avoid loading non-segments parts of the file. if (config->zSeparate != SeparateSegmentKind::None && lastRX && lastRX->lastSec == sec) off = alignTo(off, config->commonPageSize); } sectionHeaderOff = alignTo(off, config->wordsize); fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr); // Our logic assumes that sections have rising VA within the same segment. // With use of linker scripts it is possible to violate this rule and get file // offset overlaps or overflows. That should never happen with a valid script // which does not move the location counter backwards and usually scripts do // not do that. Unfortunately, there are apps in the wild, for example, Linux // kernel, which control segment distribution explicitly and move the counter // backwards, so we have to allow doing that to support linking them. We // perform non-critical checks for overlaps in checkSectionOverlap(), but here // we want to prevent file size overflows because it would crash the linker. for (OutputSection *sec : outputSections) { if (sec->type == SHT_NOBITS) continue; if ((sec->offset > fileSize) || (sec->offset + sec->size > fileSize)) error("unable to place section " + sec->name + " at file offset " + rangeToString(sec->offset, sec->size) + "; check your linker script for overflows"); } } // Finalize the program headers. We call this function after we assign // file offsets and VAs to all sections. template void Writer::setPhdrs(Partition &part) { for (PhdrEntry *p : part.phdrs) { OutputSection *first = p->firstSec; OutputSection *last = p->lastSec; if (first) { p->p_filesz = last->offset - first->offset; if (last->type != SHT_NOBITS) p->p_filesz += last->size; p->p_memsz = last->addr + last->size - first->addr; p->p_offset = first->offset; p->p_vaddr = first->addr; // File offsets in partitions other than the main partition are relative // to the offset of the ELF headers. Perform that adjustment now. if (part.elfHeader) p->p_offset -= part.elfHeader->getParent()->offset; if (!p->hasLMA) p->p_paddr = first->getLMA(); } if (p->p_type == PT_GNU_RELRO) { p->p_align = 1; // musl/glibc ld.so rounds the size down, so we need to round up // to protect the last page. This is a no-op on FreeBSD which always // rounds up. p->p_memsz = alignTo(p->p_offset + p->p_memsz, config->commonPageSize) - p->p_offset; } } } // A helper struct for checkSectionOverlap. namespace { struct SectionOffset { OutputSection *sec; uint64_t offset; }; } // namespace // Check whether sections overlap for a specific address range (file offsets, // load and virtual addresses). static void checkOverlap(StringRef name, std::vector §ions, bool isVirtualAddr) { llvm::sort(sections, [=](const SectionOffset &a, const SectionOffset &b) { return a.offset < b.offset; }); // Finding overlap is easy given a vector is sorted by start position. // If an element starts before the end of the previous element, they overlap. for (size_t i = 1, end = sections.size(); i < end; ++i) { SectionOffset a = sections[i - 1]; SectionOffset b = sections[i]; if (b.offset >= a.offset + a.sec->size) continue; // If both sections are in OVERLAY we allow the overlapping of virtual // addresses, because it is what OVERLAY was designed for. if (isVirtualAddr && a.sec->inOverlay && b.sec->inOverlay) continue; errorOrWarn("section " + a.sec->name + " " + name + " range overlaps with " + b.sec->name + "\n>>> " + a.sec->name + " range is " + rangeToString(a.offset, a.sec->size) + "\n>>> " + b.sec->name + " range is " + rangeToString(b.offset, b.sec->size)); } } // Check for overlapping sections and address overflows. // // In this function we check that none of the output sections have overlapping // file offsets. For SHF_ALLOC sections we also check that the load address // ranges and the virtual address ranges don't overlap template void Writer::checkSections() { // First, check that section's VAs fit in available address space for target. for (OutputSection *os : outputSections) if ((os->addr + os->size < os->addr) || (!ELFT::Is64Bits && os->addr + os->size > UINT32_MAX)) errorOrWarn("section " + os->name + " at 0x" + utohexstr(os->addr) + " of size 0x" + utohexstr(os->size) + " exceeds available address space"); // Check for overlapping file offsets. In this case we need to skip any // section marked as SHT_NOBITS. These sections don't actually occupy space in // the file so Sec->Offset + Sec->Size can overlap with others. If --oformat // binary is specified only add SHF_ALLOC sections are added to the output // file so we skip any non-allocated sections in that case. std::vector fileOffs; for (OutputSection *sec : outputSections) if (sec->size > 0 && sec->type != SHT_NOBITS && (!config->oFormatBinary || (sec->flags & SHF_ALLOC))) fileOffs.push_back({sec, sec->offset}); checkOverlap("file", fileOffs, false); // When linking with -r there is no need to check for overlapping virtual/load // addresses since those addresses will only be assigned when the final // executable/shared object is created. if (config->relocatable) return; // Checking for overlapping virtual and load addresses only needs to take // into account SHF_ALLOC sections since others will not be loaded. // Furthermore, we also need to skip SHF_TLS sections since these will be // mapped to other addresses at runtime and can therefore have overlapping // ranges in the file. std::vector vmas; for (OutputSection *sec : outputSections) if (sec->size > 0 && (sec->flags & SHF_ALLOC) && !(sec->flags & SHF_TLS)) vmas.push_back({sec, sec->addr}); checkOverlap("virtual address", vmas, true); // Finally, check that the load addresses don't overlap. This will usually be // the same as the virtual addresses but can be different when using a linker // script with AT(). std::vector lmas; for (OutputSection *sec : outputSections) if (sec->size > 0 && (sec->flags & SHF_ALLOC) && !(sec->flags & SHF_TLS)) lmas.push_back({sec, sec->getLMA()}); checkOverlap("load address", lmas, false); } // The entry point address is chosen in the following ways. // // 1. the '-e' entry command-line option; // 2. the ENTRY(symbol) command in a linker control script; // 3. the value of the symbol _start, if present; // 4. the number represented by the entry symbol, if it is a number; // 5. the address of the first byte of the .text section, if present; // 6. the address 0. static uint64_t getEntryAddr() { // Case 1, 2 or 3 if (Symbol *b = symtab->find(config->entry)) return b->getVA(); // Case 4 uint64_t addr; if (to_integer(config->entry, addr)) return addr; // Case 5 if (OutputSection *sec = findSection(".text")) { if (config->warnMissingEntry) warn("cannot find entry symbol " + config->entry + "; defaulting to 0x" + utohexstr(sec->addr)); return sec->addr; } // Case 6 if (config->warnMissingEntry) warn("cannot find entry symbol " + config->entry + "; not setting start address"); return 0; } static uint16_t getELFType() { if (config->isPic) return ET_DYN; if (config->relocatable) return ET_REL; return ET_EXEC; } template void Writer::writeHeader() { writeEhdr(Out::bufferStart, *mainPart); writePhdrs(Out::bufferStart + sizeof(Elf_Ehdr), *mainPart); auto *eHdr = reinterpret_cast(Out::bufferStart); eHdr->e_type = getELFType(); eHdr->e_entry = getEntryAddr(); eHdr->e_shoff = sectionHeaderOff; // Write the section header table. // // The ELF header can only store numbers up to SHN_LORESERVE in the e_shnum // and e_shstrndx fields. When the value of one of these fields exceeds // SHN_LORESERVE ELF requires us to put sentinel values in the ELF header and // use fields in the section header at index 0 to store // the value. The sentinel values and fields are: // e_shnum = 0, SHdrs[0].sh_size = number of sections. // e_shstrndx = SHN_XINDEX, SHdrs[0].sh_link = .shstrtab section index. auto *sHdrs = reinterpret_cast(Out::bufferStart + eHdr->e_shoff); size_t num = outputSections.size() + 1; if (num >= SHN_LORESERVE) sHdrs->sh_size = num; else eHdr->e_shnum = num; uint32_t strTabIndex = in.shStrTab->getParent()->sectionIndex; if (strTabIndex >= SHN_LORESERVE) { sHdrs->sh_link = strTabIndex; eHdr->e_shstrndx = SHN_XINDEX; } else { eHdr->e_shstrndx = strTabIndex; } for (OutputSection *sec : outputSections) sec->writeHeaderTo(++sHdrs); } // Open a result file. template void Writer::openFile() { uint64_t maxSize = config->is64 ? INT64_MAX : UINT32_MAX; if (fileSize != size_t(fileSize) || maxSize < fileSize) { error("output file too large: " + Twine(fileSize) + " bytes"); return; } unlinkAsync(config->outputFile); unsigned flags = 0; if (!config->relocatable) flags |= FileOutputBuffer::F_executable; if (!config->mmapOutputFile) flags |= FileOutputBuffer::F_no_mmap; Expected> bufferOrErr = FileOutputBuffer::create(config->outputFile, fileSize, flags); if (!bufferOrErr) { error("failed to open " + config->outputFile + ": " + llvm::toString(bufferOrErr.takeError())); return; } buffer = std::move(*bufferOrErr); Out::bufferStart = buffer->getBufferStart(); } template void Writer::writeSectionsBinary() { for (OutputSection *sec : outputSections) if (sec->flags & SHF_ALLOC) sec->writeTo(Out::bufferStart + sec->offset); } static void fillTrap(uint8_t *i, uint8_t *end) { for (; i + 4 <= end; i += 4) memcpy(i, &target->trapInstr, 4); } // Fill the last page of executable segments with trap instructions // instead of leaving them as zero. Even though it is not required by any // standard, it is in general a good thing to do for security reasons. // // We'll leave other pages in segments as-is because the rest will be // overwritten by output sections. template void Writer::writeTrapInstr() { for (Partition &part : partitions) { // Fill the last page. for (PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD && (p->p_flags & PF_X)) fillTrap(Out::bufferStart + alignDown(p->firstSec->offset + p->p_filesz, config->commonPageSize), Out::bufferStart + alignTo(p->firstSec->offset + p->p_filesz, config->commonPageSize)); // Round up the file size of the last segment to the page boundary iff it is // an executable segment to ensure that other tools don't accidentally // trim the instruction padding (e.g. when stripping the file). PhdrEntry *last = nullptr; for (PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD) last = p; if (last && (last->p_flags & PF_X)) last->p_memsz = last->p_filesz = alignTo(last->p_filesz, config->commonPageSize); } } // Write section contents to a mmap'ed file. template void Writer::writeSections() { // In -r or -emit-relocs mode, write the relocation sections first as in // ELf_Rel targets we might find out that we need to modify the relocated // section while doing it. for (OutputSection *sec : outputSections) if (sec->type == SHT_REL || sec->type == SHT_RELA) sec->writeTo(Out::bufferStart + sec->offset); for (OutputSection *sec : outputSections) if (sec->type != SHT_REL && sec->type != SHT_RELA) sec->writeTo(Out::bufferStart + sec->offset); } // Split one uint8 array into small pieces of uint8 arrays. static std::vector> split(ArrayRef arr, size_t chunkSize) { std::vector> ret; while (arr.size() > chunkSize) { ret.push_back(arr.take_front(chunkSize)); arr = arr.drop_front(chunkSize); } if (!arr.empty()) ret.push_back(arr); return ret; } // Computes a hash value of Data using a given hash function. // In order to utilize multiple cores, we first split data into 1MB // chunks, compute a hash for each chunk, and then compute a hash value // of the hash values. static void computeHash(llvm::MutableArrayRef hashBuf, llvm::ArrayRef data, std::function arr)> hashFn) { std::vector> chunks = split(data, 1024 * 1024); std::vector hashes(chunks.size() * hashBuf.size()); // Compute hash values. parallelForEachN(0, chunks.size(), [&](size_t i) { hashFn(hashes.data() + i * hashBuf.size(), chunks[i]); }); // Write to the final output buffer. hashFn(hashBuf.data(), hashes); } template void Writer::writeBuildId() { if (!mainPart->buildId || !mainPart->buildId->getParent()) return; if (config->buildId == BuildIdKind::Hexstring) { for (Partition &part : partitions) part.buildId->writeBuildId(config->buildIdVector); return; } // Compute a hash of all sections of the output file. size_t hashSize = mainPart->buildId->hashSize; std::vector buildId(hashSize); llvm::ArrayRef buf{Out::bufferStart, size_t(fileSize)}; switch (config->buildId) { case BuildIdKind::Fast: computeHash(buildId, buf, [](uint8_t *dest, ArrayRef arr) { write64le(dest, xxHash64(arr)); }); break; case BuildIdKind::Md5: computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef arr) { memcpy(dest, MD5::hash(arr).data(), hashSize); }); break; case BuildIdKind::Sha1: computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef arr) { memcpy(dest, SHA1::hash(arr).data(), hashSize); }); break; case BuildIdKind::Uuid: if (auto ec = llvm::getRandomBytes(buildId.data(), hashSize)) error("entropy source failure: " + ec.message()); break; default: llvm_unreachable("unknown BuildIdKind"); } for (Partition &part : partitions) part.buildId->writeBuildId(buildId); } template void createSyntheticSections(); template void createSyntheticSections(); template void createSyntheticSections(); template void createSyntheticSections(); template void writeResult(); template void writeResult(); template void writeResult(); template void writeResult(); } // namespace elf } // namespace lld