diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index 56124acaf9a1..9b642dcaf137 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -1,410 +1,441 @@ //===- Chunks.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Chunks.h" #include "Error.h" #include "InputFiles.h" #include "Symbols.h" +#include "Writer.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::COFF; using llvm::support::ulittle32_t; namespace lld { namespace coff { SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) : Chunk(SectionKind), Repl(this), Header(H), File(F), Relocs(File->getCOFFObj()->getRelocations(Header)), NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { // Initialize SectionName. File->getCOFFObj()->getSectionName(Header, SectionName); Align = Header->getAlignment(); // Chunks may be discarded during comdat merging. Discarded = false; // If linker GC is disabled, every chunk starts out alive. If linker GC is // enabled, treat non-comdat sections as roots. Generally optimized object // files will be built with -ffunction-sections or /Gy, so most things worth // stripping will be in a comdat. Live = !Config->DoGC || !isCOMDAT(); } static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); } static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); } -static void applySecRel(const SectionChunk *Sec, uint8_t *Off, Defined *Sym) { - // Don't apply section relative relocations to absolute symbols in codeview - // debug info sections. MSVC does not treat such relocations as fatal errors, - // and they can be found in the standard library for linker-provided symbols - // like __guard_fids_table and __safe_se_handler_table. - if (!(isa(Sym) && Sec->isCodeView())) - add32(Off, Sym->getSecrel()); +static void applySecRel(const SectionChunk *Sec, uint8_t *Off, + OutputSection *OS, uint64_t S) { + if (!OS) { + if (Sec->isCodeView()) + return; + fatal("SECREL relocation cannot be applied to absolute symbols"); + } + uint64_t SecRel = S - OS->getRVA(); + assert(SecRel < INT32_MAX && "overflow in SECREL relocation"); + add32(Off, SecRel); +} + +static void applySecIdx(uint8_t *Off, OutputSection *OS) { + // If we have no output section, this must be an absolute symbol. Use the + // sentinel absolute symbol section index. + uint16_t SecIdx = OS ? OS->SectionIndex : DefinedAbsolute::OutputSectionIndex; + add16(Off, SecIdx); } -void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, - uint64_t P) const { - uint64_t S = Sym->getRVA(); +void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, + uint64_t S, uint64_t P) const { switch (Type) { case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break; case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break; case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break; case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break; case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break; case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break; case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break; case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break; case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break; - case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break; - case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, Sym); break; + case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break; + case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break; default: fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); } } -void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, - uint64_t P) const { - uint64_t S = Sym->getRVA(); +void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, + uint64_t S, uint64_t P) const { switch (Type) { case IMAGE_REL_I386_ABSOLUTE: break; case IMAGE_REL_I386_DIR32: add32(Off, S + Config->ImageBase); break; case IMAGE_REL_I386_DIR32NB: add32(Off, S); break; case IMAGE_REL_I386_REL32: add32(Off, S - P - 4); break; - case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break; - case IMAGE_REL_I386_SECREL: applySecRel(this, Off, Sym); break; + case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break; + case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break; default: fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); } } static void applyMOV(uint8_t *Off, uint16_t V) { write16le(Off, (read16le(Off) & 0xfbf0) | ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff)); } static uint16_t readMOV(uint8_t *Off) { uint16_t Opcode1 = read16le(Off); uint16_t Opcode2 = read16le(Off + 2); uint16_t Imm = (Opcode2 & 0x00ff) | ((Opcode2 >> 4) & 0x0700); Imm |= ((Opcode1 << 1) & 0x0800) | ((Opcode1 & 0x000f) << 12); return Imm; } static void applyMOV32T(uint8_t *Off, uint32_t V) { uint16_t ImmW = readMOV(Off); // read MOVW operand uint16_t ImmT = readMOV(Off + 4); // read MOVT operand uint32_t Imm = ImmW | (ImmT << 16); V += Imm; // add the immediate offset applyMOV(Off, V); // set MOVW operand applyMOV(Off + 4, V >> 16); // set MOVT operand } static void applyBranch20T(uint8_t *Off, int32_t V) { uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = (V >> 19) & 1; uint32_t J2 = (V >> 18) & 1; or16(Off, (S << 10) | ((V >> 12) & 0x3f)); or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); } static void applyBranch24T(uint8_t *Off, int32_t V) { if (!isInt<25>(V)) fatal("relocation out of range"); uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = ((~V >> 23) & 1) ^ S; uint32_t J2 = ((~V >> 22) & 1) ^ S; or16(Off, (S << 10) | ((V >> 12) & 0x3ff)); // Clear out the J1 and J2 bits which may be set. write16le(Off + 2, (read16le(Off + 2) & 0xd000) | (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); } -void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, - uint64_t P) const { - uint64_t S = Sym->getRVA(); +void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, + uint64_t S, uint64_t P) const { // Pointer to thumb code must have the LSB set. - if (Sym->isExecutable()) - S |= 1; + uint64_t SX = S; + if (OS && (OS->getPermissions() & IMAGE_SCN_MEM_EXECUTE)) + SX |= 1; switch (Type) { - case IMAGE_REL_ARM_ADDR32: add32(Off, S + Config->ImageBase); break; - case IMAGE_REL_ARM_ADDR32NB: add32(Off, S); break; - case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, S + Config->ImageBase); break; - case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break; - case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; - case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; - case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, Sym); break; + case IMAGE_REL_ARM_ADDR32: add32(Off, SX + Config->ImageBase); break; + case IMAGE_REL_ARM_ADDR32NB: add32(Off, SX); break; + case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, SX + Config->ImageBase); break; + case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, SX - P - 4); break; + case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, SX - P - 4); break; + case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, SX - P - 4); break; + case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break; + case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break; default: fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); } } void SectionChunk::writeTo(uint8_t *Buf) const { if (!hasData()) return; // Copy section contents from source object file to output file. ArrayRef A = getContents(); memcpy(Buf + OutputSectionOff, A.data(), A.size()); // Apply relocations. for (const coff_relocation &Rel : Relocs) { uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; + + // Get the output section of the symbol for this relocation. The output + // section is needed to compute SECREL and SECTION relocations used in debug + // info. SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); Defined *Sym = cast(Body); + Chunk *C = Sym->getChunk(); + OutputSection *OS = C ? C->getOutputSection() : nullptr; + + // Only absolute and __ImageBase symbols lack an output section. For any + // other symbol, this indicates that the chunk was discarded. Normally + // relocations against discarded sections are an error. However, debug info + // sections are not GC roots and can end up with these kinds of relocations. + // Skip these relocations. + if (!OS && !isa(Sym) && !isa(Sym)) { + if (isCodeView()) + continue; + fatal("relocation against symbol in discarded section: " + + Sym->getName()); + } + uint64_t S = Sym->getRVA(); + + // Compute the RVA of the relocation for relative relocations. uint64_t P = RVA + Rel.VirtualAddress; switch (Config->Machine) { case AMD64: - applyRelX64(Off, Rel.Type, Sym, P); + applyRelX64(Off, Rel.Type, OS, S, P); break; case I386: - applyRelX86(Off, Rel.Type, Sym, P); + applyRelX86(Off, Rel.Type, OS, S, P); break; case ARMNT: - applyRelARM(Off, Rel.Type, Sym, P); + applyRelARM(Off, Rel.Type, OS, S, P); break; default: llvm_unreachable("unknown machine type"); } } } void SectionChunk::addAssociative(SectionChunk *Child) { AssocChildren.push_back(Child); } static uint8_t getBaserelType(const coff_relocation &Rel) { switch (Config->Machine) { case AMD64: if (Rel.Type == IMAGE_REL_AMD64_ADDR64) return IMAGE_REL_BASED_DIR64; return IMAGE_REL_BASED_ABSOLUTE; case I386: if (Rel.Type == IMAGE_REL_I386_DIR32) return IMAGE_REL_BASED_HIGHLOW; return IMAGE_REL_BASED_ABSOLUTE; case ARMNT: if (Rel.Type == IMAGE_REL_ARM_ADDR32) return IMAGE_REL_BASED_HIGHLOW; if (Rel.Type == IMAGE_REL_ARM_MOV32T) return IMAGE_REL_BASED_ARM_MOV32T; return IMAGE_REL_BASED_ABSOLUTE; default: llvm_unreachable("unknown machine type"); } } // Windows-specific. // Collect all locations that contain absolute addresses, which need to be // fixed by the loader if load-time relocation is needed. // Only called when base relocation is enabled. void SectionChunk::getBaserels(std::vector *Res) { for (const coff_relocation &Rel : Relocs) { uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex); if (isa(Body)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); } } bool SectionChunk::hasData() const { return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); } uint32_t SectionChunk::getPermissions() const { return Header->Characteristics & PermMask; } bool SectionChunk::isCOMDAT() const { return Header->Characteristics & IMAGE_SCN_LNK_COMDAT; } void SectionChunk::printDiscardedMessage() const { // Removed by dead-stripping. If it's removed by ICF, ICF already // printed out the name, so don't repeat that here. if (Sym && this == Repl) { if (Discarded) message("Discarded comdat symbol " + Sym->getName()); else if (!Live) message("Discarded " + Sym->getName()); } } StringRef SectionChunk::getDebugName() { if (Sym) return Sym->getName(); return ""; } ArrayRef SectionChunk::getContents() const { ArrayRef A; File->getCOFFObj()->getSectionContents(Header, A); return A; } void SectionChunk::replace(SectionChunk *Other) { Other->Repl = Repl; Other->Live = false; } CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { // Common symbols are aligned on natural boundaries up to 32 bytes. // This is what MSVC link.exe does. Align = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue())); } uint32_t CommonChunk::getPermissions() const { return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; } void StringChunk::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, Str.data(), Str.size()); } ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) { // Intel Optimization Manual says that all branch targets // should be 16-byte aligned. MSVC linker does this too. Align = 16; } void ImportThunkChunkX64::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); // The first two bytes is a JMP instruction. Fill its operand. write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() - RVA - getSize()); } void ImportThunkChunkX86::getBaserels(std::vector *Res) { Res->emplace_back(getRVA() + 2); } void ImportThunkChunkX86::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); // The first two bytes is a JMP instruction. Fill its operand. write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() + Config->ImageBase); } void ImportThunkChunkARM::getBaserels(std::vector *Res) { Res->emplace_back(getRVA(), IMAGE_REL_BASED_ARM_MOV32T); } void ImportThunkChunkARM::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, ImportThunkARM, sizeof(ImportThunkARM)); // Fix mov.w and mov.t operands. applyMOV32T(Buf + OutputSectionOff, ImpSymbol->getRVA() + Config->ImageBase); } void LocalImportChunk::getBaserels(std::vector *Res) { Res->emplace_back(getRVA()); } size_t LocalImportChunk::getSize() const { return Config->is64() ? 8 : 4; } void LocalImportChunk::writeTo(uint8_t *Buf) const { if (Config->is64()) { write64le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); } else { write32le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); } } void SEHTableChunk::writeTo(uint8_t *Buf) const { ulittle32_t *Begin = reinterpret_cast(Buf + OutputSectionOff); size_t Cnt = 0; for (Defined *D : Syms) Begin[Cnt++] = D->getRVA(); std::sort(Begin, Begin + Cnt); } // Windows-specific. This class represents a block in .reloc section. // The format is described here. // // On Windows, each DLL is linked against a fixed base address and // usually loaded to that address. However, if there's already another // DLL that overlaps, the loader has to relocate it. To do that, DLLs // contain .reloc sections which contain offsets that need to be fixed // up at runtime. If the loader finds that a DLL cannot be loaded to its // desired base address, it loads it to somewhere else, and add - to each offset that is // specified by the .reloc section. In ELF terms, .reloc sections // contain relative relocations in REL format (as opposed to RELA.) // // This already significantly reduces the size of relocations compared // to ELF .rel.dyn, but Windows does more to reduce it (probably because // it was invented for PCs in the late '80s or early '90s.) Offsets in // .reloc are grouped by page where the page size is 12 bits, and // offsets sharing the same page address are stored consecutively to // represent them with less space. This is very similar to the page // table which is grouped by (multiple stages of) pages. // // For example, let's say we have 0x00030, 0x00500, 0x00700, 0x00A00, // 0x20004, and 0x20008 in a .reloc section for x64. The uppermost 4 // bits have a type IMAGE_REL_BASED_DIR64 or 0xA. In the section, they // are represented like this: // // 0x00000 -- page address (4 bytes) // 16 -- size of this block (4 bytes) // 0xA030 -- entries (2 bytes each) // 0xA500 // 0xA700 // 0xAA00 // 0x20000 -- page address (4 bytes) // 12 -- size of this block (4 bytes) // 0xA004 -- entries (2 bytes each) // 0xA008 // // Usually we have a lot of relocations for each page, so the number of // bytes for one .reloc entry is close to 2 bytes on average. BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { // Block header consists of 4 byte page RVA and 4 byte block size. // Each entry is 2 byte. Last entry may be padding. Data.resize(alignTo((End - Begin) * 2 + 8, 4)); uint8_t *P = Data.data(); write32le(P, Page); write32le(P + 4, Data.size()); P += 8; for (Baserel *I = Begin; I != End; ++I) { write16le(P, (I->Type << 12) | (I->RVA - Page)); P += 2; } } void BaserelChunk::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, Data.data(), Data.size()); } uint8_t Baserel::getDefaultType() { switch (Config->Machine) { case AMD64: return IMAGE_REL_BASED_DIR64; case I386: return IMAGE_REL_BASED_HIGHLOW; default: llvm_unreachable("unknown machine type"); } } } // namespace coff } // namespace lld diff --git a/COFF/Chunks.h b/COFF/Chunks.h index 54fffc5f6d08..6e1bf94da1a5 100644 --- a/COFF/Chunks.h +++ b/COFF/Chunks.h @@ -1,351 +1,354 @@ //===- Chunks.h -------------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_CHUNKS_H #define LLD_COFF_CHUNKS_H #include "Config.h" #include "InputFiles.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Object/COFF.h" #include #include namespace lld { namespace coff { using llvm::COFF::ImportDirectoryTableEntry; using llvm::object::COFFSymbolRef; using llvm::object::SectionRef; using llvm::object::coff_relocation; using llvm::object::coff_section; class Baserel; class Defined; class DefinedImportData; class DefinedRegular; class ObjectFile; class OutputSection; class SymbolBody; // Mask for section types (code, data, bss, disacardable, etc.) // and permissions (writable, readable or executable). const uint32_t PermMask = 0xFF0000F0; // A Chunk represents a chunk of data that will occupy space in the // output (if the resolver chose that). It may or may not be backed by // a section of an input file. It could be linker-created data, or // doesn't even have actual data (if common or bss). class Chunk { public: enum Kind { SectionKind, OtherKind }; Kind kind() const { return ChunkKind; } virtual ~Chunk() = default; // Returns the size of this chunk (even if this is a common or BSS.) virtual size_t getSize() const = 0; // Write this chunk to a mmap'ed file, assuming Buf is pointing to // beginning of the file. Because this function may use RVA values // of other chunks for relocations, you need to set them properly // before calling this function. virtual void writeTo(uint8_t *Buf) const {} // The writer sets and uses the addresses. uint64_t getRVA() const { return RVA; } uint32_t getAlign() const { return Align; } void setRVA(uint64_t V) { RVA = V; } // Returns true if this has non-zero data. BSS chunks return // false. If false is returned, the space occupied by this chunk // will be filled with zeros. virtual bool hasData() const { return true; } // Returns readable/writable/executable bits. virtual uint32_t getPermissions() const { return 0; } // Returns the section name if this is a section chunk. // It is illegal to call this function on non-section chunks. virtual StringRef getSectionName() const { llvm_unreachable("unimplemented getSectionName"); } // An output section has pointers to chunks in the section, and each // chunk has a back pointer to an output section. void setOutputSection(OutputSection *O) { Out = O; } OutputSection *getOutputSection() { return Out; } // Windows-specific. // Collect all locations that contain absolute addresses for base relocations. virtual void getBaserels(std::vector *Res) {} // Returns a human-readable name of this chunk. Chunks are unnamed chunks of // bytes, so this is used only for logging or debugging. virtual StringRef getDebugName() { return ""; } protected: Chunk(Kind K = OtherKind) : ChunkKind(K) {} const Kind ChunkKind; // The alignment of this chunk. The writer uses the value. uint32_t Align = 1; // The RVA of this chunk in the output. The writer sets a value. uint64_t RVA = 0; public: // The offset from beginning of the output section. The writer sets a value. uint64_t OutputSectionOff = 0; protected: // The output section for this chunk. OutputSection *Out = nullptr; }; // A chunk corresponding a section of an input file. class SectionChunk : public Chunk { // Identical COMDAT Folding feature accesses section internal data. friend class ICF; public: class symbol_iterator : public llvm::iterator_adaptor_base< symbol_iterator, const coff_relocation *, std::random_access_iterator_tag, SymbolBody *> { friend SectionChunk; ObjectFile *File; symbol_iterator(ObjectFile *File, const coff_relocation *I) : symbol_iterator::iterator_adaptor_base(I), File(File) {} public: symbol_iterator() = default; SymbolBody *operator*() const { return File->getSymbolBody(I->SymbolTableIndex); } }; SectionChunk(ObjectFile *File, const coff_section *Header); static bool classof(const Chunk *C) { return C->kind() == SectionKind; } size_t getSize() const override { return Header->SizeOfRawData; } ArrayRef getContents() const; void writeTo(uint8_t *Buf) const override; bool hasData() const override; uint32_t getPermissions() const override; StringRef getSectionName() const override { return SectionName; } void getBaserels(std::vector *Res) override; bool isCOMDAT() const; - void applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; - void applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; - void applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + void applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, + uint64_t P) const; + void applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, + uint64_t P) const; + void applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, + uint64_t P) const; // Called if the garbage collector decides to not include this chunk // in a final output. It's supposed to print out a log message to stdout. void printDiscardedMessage() const; // Adds COMDAT associative sections to this COMDAT section. A chunk // and its children are treated as a group by the garbage collector. void addAssociative(SectionChunk *Child); StringRef getDebugName() override; void setSymbol(DefinedRegular *S) { if (!Sym) Sym = S; } // Returns true if the chunk was not dropped by GC or COMDAT deduplication. bool isLive() { return Live && !Discarded; } // Used by the garbage collector. void markLive() { assert(Config->DoGC && "should only mark things live from GC"); assert(!isLive() && "Cannot mark an already live section!"); Live = true; } // Returns true if this chunk was dropped by COMDAT deduplication. bool isDiscarded() const { return Discarded; } // Used by the SymbolTable when discarding unused comdat sections. This is // redundant when GC is enabled, as all comdat sections will start out dead. void markDiscarded() { Discarded = true; } // True if this is a codeview debug info chunk. These will not be laid out in // the image. Instead they will end up in the PDB, if one is requested. bool isCodeView() const { return SectionName == ".debug" || SectionName.startswith(".debug$"); } // Allow iteration over the bodies of this chunk's relocated symbols. llvm::iterator_range symbols() const { return llvm::make_range(symbol_iterator(File, Relocs.begin()), symbol_iterator(File, Relocs.end())); } // Allow iteration over the associated child chunks for this section. ArrayRef children() const { return AssocChildren; } // A pointer pointing to a replacement for this chunk. // Initially it points to "this" object. If this chunk is merged // with other chunk by ICF, it points to another chunk, // and this chunk is considrered as dead. SectionChunk *Repl; // The CRC of the contents as described in the COFF spec 4.5.5. // Auxiliary Format 5: Section Definitions. Used for ICF. uint32_t Checksum = 0; const coff_section *Header; // The file that this chunk was created from. ObjectFile *File; private: StringRef SectionName; std::vector AssocChildren; llvm::iterator_range Relocs; size_t NumRelocs; // True if this chunk was discarded because it was a duplicate comdat section. bool Discarded; // Used by the garbage collector. bool Live; // Used for ICF (Identical COMDAT Folding) void replace(SectionChunk *Other); uint32_t Class[2] = {0, 0}; // Sym points to a section symbol if this is a COMDAT chunk. DefinedRegular *Sym = nullptr; }; // A chunk for common symbols. Common chunks don't have actual data. class CommonChunk : public Chunk { public: CommonChunk(const COFFSymbolRef Sym); size_t getSize() const override { return Sym.getValue(); } bool hasData() const override { return false; } uint32_t getPermissions() const override; StringRef getSectionName() const override { return ".bss"; } private: const COFFSymbolRef Sym; }; // A chunk for linker-created strings. class StringChunk : public Chunk { public: explicit StringChunk(StringRef S) : Str(S) {} size_t getSize() const override { return Str.size() + 1; } void writeTo(uint8_t *Buf) const override; private: StringRef Str; }; static const uint8_t ImportThunkX86[] = { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 }; static const uint8_t ImportThunkARM[] = { 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] }; // Windows-specific. // A chunk for DLL import jump table entry. In a final output, it's // contents will be a JMP instruction to some __imp_ symbol. class ImportThunkChunkX64 : public Chunk { public: explicit ImportThunkChunkX64(Defined *S); size_t getSize() const override { return sizeof(ImportThunkX86); } void writeTo(uint8_t *Buf) const override; private: Defined *ImpSymbol; }; class ImportThunkChunkX86 : public Chunk { public: explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {} size_t getSize() const override { return sizeof(ImportThunkX86); } void getBaserels(std::vector *Res) override; void writeTo(uint8_t *Buf) const override; private: Defined *ImpSymbol; }; class ImportThunkChunkARM : public Chunk { public: explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {} size_t getSize() const override { return sizeof(ImportThunkARM); } void getBaserels(std::vector *Res) override; void writeTo(uint8_t *Buf) const override; private: Defined *ImpSymbol; }; // Windows-specific. // See comments for DefinedLocalImport class. class LocalImportChunk : public Chunk { public: explicit LocalImportChunk(Defined *S) : Sym(S) {} size_t getSize() const override; void getBaserels(std::vector *Res) override; void writeTo(uint8_t *Buf) const override; private: Defined *Sym; }; // Windows-specific. // A chunk for SEH table which contains RVAs of safe exception handler // functions. x86-only. class SEHTableChunk : public Chunk { public: explicit SEHTableChunk(std::set S) : Syms(std::move(S)) {} size_t getSize() const override { return Syms.size() * 4; } void writeTo(uint8_t *Buf) const override; private: std::set Syms; }; // Windows-specific. // This class represents a block in .reloc section. // See the PE/COFF spec 5.6 for details. class BaserelChunk : public Chunk { public: BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End); size_t getSize() const override { return Data.size(); } void writeTo(uint8_t *Buf) const override; private: std::vector Data; }; class Baserel { public: Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {} explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {} uint8_t getDefaultType(); uint32_t RVA; uint8_t Type; }; } // namespace coff } // namespace lld #endif diff --git a/COFF/Driver.h b/COFF/Driver.h index 2b5d1e7ae28b..6879be2eb0c7 100644 --- a/COFF/Driver.h +++ b/COFF/Driver.h @@ -1,189 +1,188 @@ //===- Driver.h -------------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_DRIVER_H #define LLD_COFF_DRIVER_H #include "Config.h" #include "SymbolTable.h" #include "lld/Core/LLVM.h" #include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/TarWriter.h" #include #include #include namespace lld { namespace coff { class LinkerDriver; extern LinkerDriver *Driver; using llvm::COFF::MachineTypes; using llvm::COFF::WindowsSubsystem; using llvm::Optional; -class InputFile; // Implemented in MarkLive.cpp. void markLive(const std::vector &Chunks); // Implemented in ICF.cpp. void doICF(const std::vector &Chunks); class ArgParser { public: // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef Args); // Concatenate LINK environment varirable and given arguments and parse them. llvm::opt::InputArgList parseLINK(std::vector Args); // Tokenizes a given string and then parses as command line options. llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } private: std::vector tokenize(StringRef S); std::vector replaceResponseFiles(std::vector); }; class LinkerDriver { public: LinkerDriver() { coff::Symtab = &Symtab; } void link(llvm::ArrayRef Args); // Used by the resolver to parse .drectve section contents. void parseDirectives(StringRef S); // Used by ArchiveFile to enqueue members. void enqueueArchiveMember(const Archive::Child &C, StringRef SymName, StringRef ParentName); private: ArgParser Parser; SymbolTable Symtab; std::unique_ptr Tar; // for /linkrepro // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef Path); // Searches a file from search paths. Optional findFile(StringRef Filename); Optional findLib(StringRef Filename); StringRef doFindFile(StringRef Filename); StringRef doFindLib(StringRef Filename); // Parses LIB environment which contains a list of search paths. void addLibSearchPaths(); // Library search path. The first element is always "" (current directory). std::vector SearchPaths; std::set VisitedFiles; std::set VisitedLibs; SymbolBody *addUndefined(StringRef Sym); StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. // You can choose one from these four -- {w,}{WinMain,main}. // There are four different entry point functions for them, // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to // choose the right one depending on which "main" function is defined. // This function looks up the symbol table and resolve corresponding // entry point name. StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); void invokeMSVC(llvm::opt::InputArgList &Args); MemoryBufferRef takeBuffer(std::unique_ptr MB); void addBuffer(std::unique_ptr MB); void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName, StringRef ParentName); void enqueuePath(StringRef Path); void enqueueTask(std::function Task); bool run(); std::list> TaskQueue; std::vector FilePaths; std::vector Resources; }; // Functions below this line are defined in DriverUtils.cpp. void printHelp(const char *Argv0); // For /machine option. MachineTypes getMachineType(StringRef Arg); StringRef machineToStr(MachineTypes MT); // Parses a string in the form of "[,]". void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr); // Parses a string in the form of "[.]". // Minor's default value is 0. void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor); // Parses a string in the form of "[,[.]]". void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, uint32_t *Minor); void parseAlternateName(StringRef); void parseMerge(StringRef); void parseSection(StringRef); // Parses a string in the form of "EMBED[,=]|NO". void parseManifest(StringRef Arg); // Parses a string in the form of "level=|uiAccess=" void parseManifestUAC(StringRef Arg); // Create a resource file containing a manifest XML. std::unique_ptr createManifestRes(); void createSideBySideManifest(); // Used for dllexported symbols. Export parseExport(StringRef Arg); void fixupExports(); void assignExportOrdinals(); // Parses a string in the form of "key=value" and check // if value matches previous values for the key. // This feature used in the directive section to reject // incompatible objects. void checkFailIfMismatch(StringRef Arg); // Convert Windows resource files (.res files) to a .obj file // using cvtres.exe. std::unique_ptr convertResToCOFF(const std::vector &MBs); void runMSVCLinker(std::string Rsp, ArrayRef Objects); // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, #define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, #include "Options.inc" #undef OPTION }; } // namespace coff } // namespace lld #endif diff --git a/COFF/MarkLive.cpp b/COFF/MarkLive.cpp index 25e5cc350673..a2756e5c89e0 100644 --- a/COFF/MarkLive.cpp +++ b/COFF/MarkLive.cpp @@ -1,68 +1,75 @@ //===- MarkLive.cpp -------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Chunks.h" #include "Symbols.h" #include "llvm/ADT/STLExtras.h" #include namespace lld { namespace coff { // Set live bit on for each reachable chunk. Unmarked (unreachable) // COMDAT chunks will be ignored by Writer, so they will be excluded // from the final output. void markLive(const std::vector &Chunks) { // We build up a worklist of sections which have been marked as live. We only // push into the worklist when we discover an unmarked section, and we mark // as we push, so sections never appear twice in the list. SmallVector Worklist; // COMDAT section chunks are dead by default. Add non-COMDAT chunks. for (Chunk *C : Chunks) if (auto *SC = dyn_cast(C)) if (SC->isLive()) Worklist.push_back(SC); auto Enqueue = [&](SectionChunk *C) { if (C->isLive()) return; C->markLive(); Worklist.push_back(C); }; auto AddSym = [&](SymbolBody *B) { if (auto *Sym = dyn_cast(B)) Enqueue(Sym->getChunk()); else if (auto *Sym = dyn_cast(B)) Sym->File->Live = true; else if (auto *Sym = dyn_cast(B)) Sym->WrappedSym->File->Live = true; }; // Add GC root chunks. for (SymbolBody *B : Config->GCRoot) AddSym(B); while (!Worklist.empty()) { SectionChunk *SC = Worklist.pop_back_val(); + + // If this section was discarded, there are relocations referring to + // discarded sections. Ignore these sections to avoid crashing. They will be + // diagnosed during relocation processing. + if (SC->isDiscarded()) + continue; + assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. for (SymbolBody *B : SC->symbols()) AddSym(B); // Mark associative sections if any. for (SectionChunk *C : SC->children()) Enqueue(C); } } } } diff --git a/COFF/Symbols.h b/COFF/Symbols.h index 8c1390c45876..a12ae1c01e07 100644 --- a/COFF/Symbols.h +++ b/COFF/Symbols.h @@ -1,428 +1,443 @@ //===- Symbols.h ------------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_SYMBOLS_H #define LLD_COFF_SYMBOLS_H #include "Chunks.h" #include "Config.h" #include "Memory.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include #include #include namespace lld { namespace coff { using llvm::object::Archive; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_symbol_generic; class ArchiveFile; class InputFile; class ObjectFile; struct Symbol; class SymbolTable; // The base class for real symbol classes. class SymbolBody { public: enum Kind { // The order of these is significant. We start with the regular defined // symbols as those are the most prevelant and the zero tag is the cheapest // to set. Among the defined kinds, the lower the kind is preferred over // the higher kind when testing wether one symbol should take precedence // over another. DefinedRegularKind = 0, DefinedCommonKind, DefinedLocalImportKind, DefinedImportThunkKind, DefinedImportDataKind, DefinedAbsoluteKind, DefinedSyntheticKind, UndefinedKind, LazyKind, LastDefinedCOFFKind = DefinedCommonKind, LastDefinedKind = DefinedSyntheticKind, }; Kind kind() const { return static_cast(SymbolKind); } // Returns true if this is an external symbol. bool isExternal() { return IsExternal; } // Returns the symbol name. StringRef getName(); // Returns the file from which this symbol was created. InputFile *getFile(); Symbol *symbol(); const Symbol *symbol() const { return const_cast(this)->symbol(); } protected: friend SymbolTable; explicit SymbolBody(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), WrittenToSymtab(false), Name(N) {} const unsigned SymbolKind : 8; unsigned IsExternal : 1; // This bit is used by the \c DefinedRegular subclass. unsigned IsCOMDAT : 1; public: // This bit is used by Writer::createSymbolAndStringTable() to prevent // symbols from being written to the symbol table more than once. unsigned WrittenToSymtab : 1; protected: StringRef Name; }; // The base class for any defined symbols, including absolute symbols, // etc. class Defined : public SymbolBody { public: Defined(Kind K, StringRef N) : SymbolBody(K, N) {} static bool classof(const SymbolBody *S) { return S->kind() <= LastDefinedKind; } // Returns the RVA (relative virtual address) of this symbol. The // writer sets and uses RVAs. uint64_t getRVA(); - // Returns the RVA relative to the beginning of the output section. - // Used to implement SECREL relocation type. - uint32_t getSecrel(); - - // Returns the output section index. - // Used to implement SECTION relocation type. - uint16_t getSectionIndex(); - - // Returns true if this symbol points to an executable (e.g. .text) section. - // Used to implement ARM relocations. - bool isExecutable(); + // Returns the chunk containing this symbol. Absolute symbols and __ImageBase + // do not have chunks, so this may return null. + Chunk *getChunk(); }; // Symbols defined via a COFF object file or bitcode file. For COFF files, this // stores a coff_symbol_generic*, and names of internal symbols are lazily // loaded through that. For bitcode files, Sym is nullptr and the name is stored // as a StringRef. class DefinedCOFF : public Defined { friend SymbolBody; public: DefinedCOFF(Kind K, InputFile *F, StringRef N, const coff_symbol_generic *S) : Defined(K, N), File(F), Sym(S) {} static bool classof(const SymbolBody *S) { return S->kind() <= LastDefinedCOFFKind; } InputFile *getFile() { return File; } COFFSymbolRef getCOFFSymbol(); InputFile *File; protected: const coff_symbol_generic *Sym; }; // Regular defined symbols read from object file symbol tables. class DefinedRegular : public DefinedCOFF { public: DefinedRegular(InputFile *F, StringRef N, bool IsCOMDAT, bool IsExternal = false, const coff_symbol_generic *S = nullptr, SectionChunk *C = nullptr) : DefinedCOFF(DefinedRegularKind, F, N, S), Data(C ? &C->Repl : nullptr) { this->IsExternal = IsExternal; this->IsCOMDAT = IsCOMDAT; } static bool classof(const SymbolBody *S) { return S->kind() == DefinedRegularKind; } uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; } bool isCOMDAT() { return IsCOMDAT; } SectionChunk *getChunk() { return *Data; } uint32_t getValue() { return Sym->Value; } - uint32_t getSecrel(); private: SectionChunk **Data; }; class DefinedCommon : public DefinedCOFF { public: DefinedCommon(InputFile *F, StringRef N, uint64_t Size, const coff_symbol_generic *S = nullptr, CommonChunk *C = nullptr) : DefinedCOFF(DefinedCommonKind, F, N, S), Data(C), Size(Size) { this->IsExternal = true; } static bool classof(const SymbolBody *S) { return S->kind() == DefinedCommonKind; } uint64_t getRVA() { return Data->getRVA(); } - uint32_t getSecrel() { return Data->OutputSectionOff; } - uint16_t getSectionIndex(); + Chunk *getChunk() { return Data; } private: friend SymbolTable; uint64_t getSize() const { return Size; } CommonChunk *Data; uint64_t Size; }; // Absolute symbols. class DefinedAbsolute : public Defined { public: DefinedAbsolute(StringRef N, COFFSymbolRef S) : Defined(DefinedAbsoluteKind, N), VA(S.getValue()) { IsExternal = S.isExternal(); } DefinedAbsolute(StringRef N, uint64_t V) : Defined(DefinedAbsoluteKind, N), VA(V) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedAbsoluteKind; } uint64_t getRVA() { return VA - Config->ImageBase; } void setVA(uint64_t V) { VA = V; } // The sentinel absolute symbol section index. Section index relocations // against absolute symbols resolve to this 16 bit number, and it is the // largest valid section index plus one. This is written by the Writer. static uint16_t OutputSectionIndex; + uint16_t getSecIdx() { return OutputSectionIndex; } private: uint64_t VA; }; // This symbol is used for linker-synthesized symbols like __ImageBase and // __safe_se_handler_table. class DefinedSynthetic : public Defined { public: explicit DefinedSynthetic(StringRef Name, Chunk *C) : Defined(DefinedSyntheticKind, Name), C(C) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedSyntheticKind; } // A null chunk indicates that this is __ImageBase. Otherwise, this is some // other synthesized chunk, like SEHTableChunk. - uint32_t getRVA() const { return C ? C->getRVA() : 0; } - uint32_t getSecrel() const { return C ? C->OutputSectionOff : 0; } - Chunk *getChunk() const { return C; } + uint32_t getRVA() { return C ? C->getRVA() : 0; } + Chunk *getChunk() { return C; } private: Chunk *C; }; // This class represents a symbol defined in an archive file. It is // created from an archive file header, and it knows how to load an // object file from an archive to replace itself with a defined // symbol. If the resolver finds both Undefined and Lazy for // the same name, it will ask the Lazy to load a file. class Lazy : public SymbolBody { public: Lazy(ArchiveFile *F, const Archive::Symbol S) : SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {} static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } ArchiveFile *File; private: friend SymbolTable; private: const Archive::Symbol Sym; }; // Undefined symbols. class Undefined : public SymbolBody { public: explicit Undefined(StringRef N) : SymbolBody(UndefinedKind, N) {} static bool classof(const SymbolBody *S) { return S->kind() == UndefinedKind; } // An undefined symbol can have a fallback symbol which gives an // undefined symbol a second chance if it would remain undefined. // If it remains undefined, it'll be replaced with whatever the // Alias pointer points to. SymbolBody *WeakAlias = nullptr; // If this symbol is external weak, try to resolve it to a defined // symbol by searching the chain of fallback symbols. Returns the symbol if // successful, otherwise returns null. Defined *getWeakAlias(); }; // Windows-specific classes. // This class represents a symbol imported from a DLL. This has two // names for internal use and external use. The former is used for // name resolution, and the latter is used for the import descriptor // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: DefinedImportData(StringRef N, ImportFile *F) : Defined(DefinedImportDataKind, N), File(F) { } static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportDataKind; } uint64_t getRVA() { return File->Location->getRVA(); } + Chunk *getChunk() { return File->Location; } + void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } + StringRef getDLLName() { return File->DLLName; } StringRef getExternalName() { return File->ExternalName; } - void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } ImportFile *File; }; // This class represents a symbol for a jump table entry which jumps // to a function in a DLL. Linker are supposed to create such symbols // without "__imp_" prefix for all function symbols exported from // DLLs, so that you can call DLL functions as regular functions with // a regular name. A function pointer is given as a DefinedImportData. class DefinedImportThunk : public Defined { public: DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine); static bool classof(const SymbolBody *S) { return S->kind() == DefinedImportThunkKind; } uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } DefinedImportData *WrappedSym; private: Chunk *Data; }; // If you have a symbol "__imp_foo" in your object file, a symbol name // "foo" becomes automatically available as a pointer to "__imp_foo". // This class is for such automatically-created symbols. // Yes, this is an odd feature. We didn't intend to implement that. // This is here just for compatibility with MSVC. class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef N, Defined *S) : Defined(DefinedLocalImportKind, N), Data(make(S)) {} static bool classof(const SymbolBody *S) { return S->kind() == DefinedLocalImportKind; } uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } private: LocalImportChunk *Data; }; inline uint64_t Defined::getRVA() { switch (kind()) { case DefinedAbsoluteKind: return cast(this)->getRVA(); case DefinedSyntheticKind: return cast(this)->getRVA(); case DefinedImportDataKind: return cast(this)->getRVA(); case DefinedImportThunkKind: return cast(this)->getRVA(); case DefinedLocalImportKind: return cast(this)->getRVA(); case DefinedCommonKind: return cast(this)->getRVA(); case DefinedRegularKind: return cast(this)->getRVA(); case LazyKind: case UndefinedKind: llvm_unreachable("Cannot get the address for an undefined symbol."); } llvm_unreachable("unknown symbol kind"); } +inline Chunk *Defined::getChunk() { + switch (kind()) { + case DefinedRegularKind: + return cast(this)->getChunk(); + case DefinedAbsoluteKind: + return nullptr; + case DefinedSyntheticKind: + return cast(this)->getChunk(); + case DefinedImportDataKind: + return cast(this)->getChunk(); + case DefinedImportThunkKind: + return cast(this)->getChunk(); + case DefinedLocalImportKind: + return cast(this)->getChunk(); + case DefinedCommonKind: + return cast(this)->getChunk(); + case LazyKind: + case UndefinedKind: + llvm_unreachable("Cannot get the chunk of an undefined symbol."); + } + llvm_unreachable("unknown symbol kind"); +} + // A real symbol object, SymbolBody, is usually stored within a Symbol. There's // always one Symbol for each symbol name. The resolver updates the SymbolBody // stored in the Body field of this object as it resolves symbols. Symbol also // holds computed properties of symbol names. struct Symbol { // True if this symbol was referenced by a regular (non-bitcode) object. unsigned IsUsedInRegularObj : 1; // True if we've seen both a lazy and an undefined symbol with this symbol // name, which means that we have enqueued an archive member load and should // not load any more archive members to resolve the same symbol. unsigned PendingArchiveLoad : 1; // This field is used to store the Symbol's SymbolBody. This instantiation of // AlignedCharArrayUnion gives us a struct with a char array field that is // large and aligned enough to store any derived class of SymbolBody. llvm::AlignedCharArrayUnion< DefinedRegular, DefinedCommon, DefinedAbsolute, DefinedSynthetic, Lazy, Undefined, DefinedImportData, DefinedImportThunk, DefinedLocalImport> Body; SymbolBody *body() { return reinterpret_cast(Body.buffer); } const SymbolBody *body() const { return const_cast(this)->body(); } }; template void replaceBody(Symbol *S, ArgT &&... Arg) { static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); static_assert(alignof(T) <= alignof(decltype(S->Body)), "Body not aligned enough"); assert(static_cast(static_cast(nullptr)) == nullptr && "Not a SymbolBody"); new (S->Body.buffer) T(std::forward(Arg)...); } inline Symbol *SymbolBody::symbol() { assert(isExternal()); return reinterpret_cast(reinterpret_cast(this) - offsetof(Symbol, Body)); } } // namespace coff std::string toString(coff::SymbolBody &B); } // namespace lld #endif diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index d32577b361fa..4cf718a48d8b 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -1,950 +1,900 @@ //===- Writer.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Writer.h" #include "Config.h" #include "DLL.h" #include "Error.h" #include "InputFiles.h" #include "MapFile.h" #include "Memory.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; static const int SectorSize = 512; static const int DOSStubSize = 64; static const int NumberfOfDataDirectory = 16; namespace { class DebugDirectoryChunk : public Chunk { public: DebugDirectoryChunk(const std::vector &R) : Records(R) {} size_t getSize() const override { return Records.size() * sizeof(debug_directory); } void writeTo(uint8_t *B) const override { auto *D = reinterpret_cast(B + OutputSectionOff); for (const Chunk *Record : Records) { D->Characteristics = 0; D->TimeDateStamp = 0; D->MajorVersion = 0; D->MinorVersion = 0; D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW; D->SizeOfData = Record->getSize(); D->AddressOfRawData = Record->getRVA(); // TODO(compnerd) get the file offset D->PointerToRawData = 0; ++D; } } private: const std::vector &Records; }; class CVDebugRecordChunk : public Chunk { size_t getSize() const override { return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1; } void writeTo(uint8_t *B) const override { // Save off the DebugInfo entry to backfill the file signature (build id) // in Writer::writeBuildId DI = reinterpret_cast(B + OutputSectionOff); DI->Signature.CVSignature = OMF::Signature::PDB70; // variable sized field (PDB Path) auto *P = reinterpret_cast(B + OutputSectionOff + sizeof(*DI)); if (!Config->PDBPath.empty()) memcpy(P, Config->PDBPath.data(), Config->PDBPath.size()); P[Config->PDBPath.size()] = '\0'; } public: mutable codeview::DebugInfo *DI = nullptr; }; // The writer writes a SymbolTable result to a file. class Writer { public: Writer(SymbolTable *T) : Symtab(T) {} void run(); private: void createSections(); void createMiscChunks(); void createImportTables(); void createExportTable(); void assignAddresses(); void removeEmptySections(); void createSymbolAndStringTable(); void openFile(StringRef OutputPath); template void writeHeader(); void fixSafeSEHSymbols(); void setSectionPermissions(); void writeSections(); void sortExceptionTable(); void writeBuildId(); - void applyRelocations(); llvm::Optional createSymbol(Defined *D); size_t addEntryToStringTable(StringRef Str); OutputSection *findSection(StringRef Name); OutputSection *createSection(StringRef Name); void addBaserels(OutputSection *Dest); void addBaserelBlocks(OutputSection *Dest, std::vector &V); uint32_t getSizeOfInitializedData(); std::map> binImports(); SymbolTable *Symtab; std::unique_ptr Buffer; std::vector OutputSections; std::vector Strtab; std::vector OutputSymtab; IdataContents Idata; DelayLoadContents DelayIdata; EdataContents Edata; SEHTableChunk *SEHTable = nullptr; Chunk *DebugDirectory = nullptr; std::vector DebugRecords; CVDebugRecordChunk *BuildId = nullptr; ArrayRef SectionTable; uint64_t FileSize; uint32_t PointerToSymbolTable = 0; uint64_t SizeOfImage; uint64_t SizeOfHeaders; }; } // anonymous namespace namespace lld { namespace coff { void writeResult(SymbolTable *T) { Writer(T).run(); } void OutputSection::setRVA(uint64_t RVA) { Header.VirtualAddress = RVA; for (Chunk *C : Chunks) C->setRVA(C->getRVA() + RVA); } void OutputSection::setFileOffset(uint64_t Off) { // If a section has no actual data (i.e. BSS section), we want to // set 0 to its PointerToRawData. Otherwise the output is rejected // by the loader. if (Header.SizeOfRawData == 0) return; Header.PointerToRawData = Off; } void OutputSection::addChunk(Chunk *C) { Chunks.push_back(C); C->setOutputSection(this); uint64_t Off = Header.VirtualSize; Off = alignTo(Off, C->getAlign()); C->setRVA(Off); C->OutputSectionOff = Off; Off += C->getSize(); Header.VirtualSize = Off; if (C->hasData()) Header.SizeOfRawData = alignTo(Off, SectorSize); } void OutputSection::addPermissions(uint32_t C) { Header.Characteristics |= C & PermMask; } void OutputSection::setPermissions(uint32_t C) { Header.Characteristics = C & PermMask; } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *Buf) { auto *Hdr = reinterpret_cast(Buf); *Hdr = Header; if (StringTableOff) { // If name is too long, write offset into the string table as a name. sprintf(Hdr->Name, "/%d", StringTableOff); } else { assert(!Config->Debug || Name.size() <= COFF::NameSize); strncpy(Hdr->Name, Name.data(), std::min(Name.size(), (size_t)COFF::NameSize)); } } -uint32_t Defined::getSecrel() { - assert(this); - switch (kind()) { - case DefinedRegularKind: - return cast(this)->getSecrel(); - case DefinedCommonKind: - return cast(this)->getSecrel(); - case DefinedSyntheticKind: - return cast(this)->getSecrel(); - default: - break; - } - fatal("SECREL relocation points to a non-regular symbol: " + toString(*this)); -} - -uint32_t DefinedRegular::getSecrel() { - assert(getChunk()->isLive() && "relocation against discarded section"); - uint64_t Diff = getRVA() - getChunk()->getOutputSection()->getRVA(); - assert(Diff < UINT32_MAX && "section offset too large"); - return (uint32_t)Diff; -} - -uint16_t Defined::getSectionIndex() { - if (auto *D = dyn_cast(this)) - return D->getChunk()->getOutputSection()->SectionIndex; - if (isa(this)) - return DefinedAbsolute::OutputSectionIndex; - if (auto *D = dyn_cast(this)) - return D->getSectionIndex(); - if (auto *D = dyn_cast(this)) { - if (!D->getChunk()) - return 0; - return D->getChunk()->getOutputSection()->SectionIndex; - } - fatal("SECTION relocation points to a non-regular symbol: " + - toString(*this)); -} - -uint16_t DefinedCommon::getSectionIndex() { - return Data->getOutputSection()->SectionIndex; -} - -bool Defined::isExecutable() { - const auto X = IMAGE_SCN_MEM_EXECUTE; - if (auto *D = dyn_cast(this)) - return D->getChunk()->getOutputSection()->getPermissions() & X; - return isa(this); -} - } // namespace coff } // namespace lld // The main function of the writer. void Writer::run() { createSections(); createMiscChunks(); createImportTables(); createExportTable(); if (Config->Relocatable) createSection(".reloc"); assignAddresses(); removeEmptySections(); setSectionPermissions(); createSymbolAndStringTable(); openFile(Config->OutputFile); if (Config->is64()) { writeHeader(); } else { writeHeader(); } fixSafeSEHSymbols(); writeSections(); sortExceptionTable(); writeBuildId(); if (!Config->PDBPath.empty() && Config->Debug) { const llvm::codeview::DebugInfo *DI = nullptr; if (Config->DebugTypes & static_cast(coff::DebugType::CV)) DI = BuildId->DI; createPDB(Config->PDBPath, Symtab, SectionTable, DI); } writeMapFile(OutputSections); if (auto EC = Buffer->commit()) fatal(EC, "failed to write the output file"); } static StringRef getOutputSection(StringRef Name) { StringRef S = Name.split('$').first; auto It = Config->Merge.find(S); if (It == Config->Merge.end()) return S; return It->second; } // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, bin chunks by name. std::map> Map; for (Chunk *C : Symtab->getChunks()) { auto *SC = dyn_cast(C); if (SC && !SC->isLive()) { if (Config->Verbose) SC->printDiscardedMessage(); continue; } Map[C->getSectionName()].push_back(C); } // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. SmallDenseMap Sections; for (auto Pair : Map) { StringRef Name = getOutputSection(Pair.first); OutputSection *&Sec = Sections[Name]; if (!Sec) { Sec = make(Name); OutputSections.push_back(Sec); } std::vector &Chunks = Pair.second; for (Chunk *C : Chunks) { Sec->addChunk(C); Sec->addPermissions(C->getPermissions()); } } } void Writer::createMiscChunks() { OutputSection *RData = createSection(".rdata"); // Create thunks for locally-dllimported symbols. if (!Symtab->LocalImportChunks.empty()) { for (Chunk *C : Symtab->LocalImportChunks) RData->addChunk(C); } // Create Debug Information Chunks if (Config->Debug) { DebugDirectory = make(DebugRecords); // TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled if (Config->DebugTypes & static_cast(coff::DebugType::CV)) { auto *Chunk = make(); BuildId = Chunk; DebugRecords.push_back(Chunk); } RData->addChunk(DebugDirectory); for (Chunk *C : DebugRecords) RData->addChunk(C); } // Create SEH table. x86-only. if (Config->Machine != I386) return; std::set Handlers; for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { if (!File->SEHCompat) return; for (SymbolBody *B : File->SEHandlers) { // Make sure the handler is still live. Assume all handlers are regular // symbols. auto *D = dyn_cast(B); if (D && D->getChunk()->isLive()) Handlers.insert(D); } } if (!Handlers.empty()) { SEHTable = make(Handlers); RData->addChunk(SEHTable); } } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { if (Symtab->ImportFiles.empty()) return; // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *File : Symtab->ImportFiles) { if (!File->Live) continue; std::string DLL = StringRef(File->DLLName).lower(); if (Config->DLLOrder.count(DLL) == 0) Config->DLLOrder[DLL] = Config->DLLOrder.size(); } OutputSection *Text = createSection(".text"); for (ImportFile *File : Symtab->ImportFiles) { if (!File->Live) continue; if (DefinedImportThunk *Thunk = File->ThunkSym) Text->addChunk(Thunk->getChunk()); if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { if (!File->ThunkSym) fatal("cannot delay-load " + toString(File) + " due to import of data: " + toString(*File->ImpSym)); DelayIdata.add(File->ImpSym); } else { Idata.add(File->ImpSym); } } if (!Idata.empty()) { OutputSection *Sec = createSection(".idata"); for (Chunk *C : Idata.getChunks()) Sec->addChunk(C); } if (!DelayIdata.empty()) { Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) Sec->addChunk(C); Sec = createSection(".data"); for (Chunk *C : DelayIdata.getDataChunks()) Sec->addChunk(C); Sec = createSection(".text"); for (Chunk *C : DelayIdata.getCodeChunks()) Sec->addChunk(C); } } void Writer::createExportTable() { if (Config->Exports.empty()) return; OutputSection *Sec = createSection(".edata"); for (Chunk *C : Edata.Chunks) Sec->addChunk(C); } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; }; OutputSections.erase( std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty), OutputSections.end()); uint32_t Idx = 1; for (OutputSection *Sec : OutputSections) Sec->SectionIndex = Idx++; } size_t Writer::addEntryToStringTable(StringRef Str) { assert(Str.size() > COFF::NameSize); size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field Strtab.insert(Strtab.end(), Str.begin(), Str.end()); Strtab.push_back('\0'); return OffsetOfEntry; } Optional Writer::createSymbol(Defined *Def) { // Relative symbols are unrepresentable in a COFF symbol table. if (isa(Def)) return None; if (auto *D = dyn_cast(Def)) { // Don't write dead symbols or symbols in codeview sections to the symbol // table. if (!D->getChunk()->isLive() || D->getChunk()->isCodeView()) return None; } if (auto *Sym = dyn_cast(Def)) if (!Sym->File->Live) return None; if (auto *Sym = dyn_cast(Def)) if (!Sym->WrappedSym->File->Live) return None; coff_symbol16 Sym; StringRef Name = Def->getName(); if (Name.size() > COFF::NameSize) { Sym.Name.Offset.Zeroes = 0; Sym.Name.Offset.Offset = addEntryToStringTable(Name); } else { memset(Sym.Name.ShortName, 0, COFF::NameSize); memcpy(Sym.Name.ShortName, Name.data(), Name.size()); } if (auto *D = dyn_cast(Def)) { COFFSymbolRef Ref = D->getCOFFSymbol(); Sym.Type = Ref.getType(); Sym.StorageClass = Ref.getStorageClass(); } else { Sym.Type = IMAGE_SYM_TYPE_NULL; Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } Sym.NumberOfAuxSymbols = 0; switch (Def->kind()) { case SymbolBody::DefinedAbsoluteKind: Sym.Value = Def->getRVA(); Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; default: { uint64_t RVA = Def->getRVA(); OutputSection *Sec = nullptr; for (OutputSection *S : OutputSections) { if (S->getRVA() > RVA) break; Sec = S; } Sym.Value = RVA - Sec->getRVA(); Sym.SectionNumber = Sec->SectionIndex; break; } } return Sym; } void Writer::createSymbolAndStringTable() { if (!Config->Debug || !Config->WriteSymtab) return; // Name field in the section table is 8 byte long. Longer names need // to be written to the string table. First, construct string table. for (OutputSection *Sec : OutputSections) { StringRef Name = Sec->getName(); if (Name.size() <= COFF::NameSize) continue; Sec->setStringTableOff(addEntryToStringTable(Name)); } for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { for (SymbolBody *B : File->getSymbols()) { auto *D = dyn_cast(B); if (!D || D->WrittenToSymtab) continue; D->WrittenToSymtab = true; if (Optional Sym = createSymbol(D)) OutputSymtab.push_back(*Sym); } } OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. uint64_t FileOff = LastSection->getFileOff() + alignTo(LastSection->getRawSize(), SectorSize); if (!OutputSymtab.empty()) { PointerToSymbolTable = FileOff; FileOff += OutputSymtab.size() * sizeof(coff_symbol16); } if (!Strtab.empty()) FileOff += Strtab.size() + 4; FileSize = alignTo(FileOff, SectorSize); } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * NumberfOfDataDirectory + sizeof(coff_section) * OutputSections.size(); SizeOfHeaders += Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize); uint64_t RVA = 0x1000; // The first page is kept unmapped. FileSize = SizeOfHeaders; // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because // the loader cannot handle holes. std::stable_partition( OutputSections.begin(), OutputSections.end(), [](OutputSection *S) { return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0; }); for (OutputSection *Sec : OutputSections) { if (Sec->getName() == ".reloc") addBaserels(Sec); Sec->setRVA(RVA); Sec->setFileOffset(FileSize); RVA += alignTo(Sec->getVirtualSize(), PageSize); FileSize += alignTo(Sec->getRawSize(), SectorSize); } SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize); } template void Writer::writeHeader() { // Write DOS stub uint8_t *Buf = Buffer->getBufferStart(); auto *DOS = reinterpret_cast(Buf); Buf += DOSStubSize; DOS->Magic[0] = 'M'; DOS->Magic[1] = 'Z'; DOS->AddressOfRelocationTable = sizeof(dos_header); DOS->AddressOfNewExeHeader = DOSStubSize; // Write PE magic memcpy(Buf, PEMagic, sizeof(PEMagic)); Buf += sizeof(PEMagic); // Write COFF header auto *COFF = reinterpret_cast(Buf); Buf += sizeof(*COFF); COFF->Machine = Config->Machine; COFF->NumberOfSections = OutputSections.size(); COFF->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (Config->LargeAddressAware) COFF->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; if (!Config->is64()) COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE; if (Config->DLL) COFF->Characteristics |= IMAGE_FILE_DLL; if (!Config->Relocatable) COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; COFF->SizeOfOptionalHeader = sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory; // Write PE header auto *PE = reinterpret_cast(Buf); Buf += sizeof(*PE); PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; // If {Major,Minor}LinkerVersion is left at 0.0, then for some // reason signing the resulting PE file with Authenticode produces a // signature that fails to validate on Windows 7 (but is OK on 10). // Set it to 14.0, which is what VS2015 outputs, and which avoids // that problem. PE->MajorLinkerVersion = 14; PE->MinorLinkerVersion = 0; PE->ImageBase = Config->ImageBase; PE->SectionAlignment = PageSize; PE->FileAlignment = SectorSize; PE->MajorImageVersion = Config->MajorImageVersion; PE->MinorImageVersion = Config->MinorImageVersion; PE->MajorOperatingSystemVersion = Config->MajorOSVersion; PE->MinorOperatingSystemVersion = Config->MinorOSVersion; PE->MajorSubsystemVersion = Config->MajorOSVersion; PE->MinorSubsystemVersion = Config->MinorOSVersion; PE->Subsystem = Config->Subsystem; PE->SizeOfImage = SizeOfImage; PE->SizeOfHeaders = SizeOfHeaders; if (!Config->NoEntry) { Defined *Entry = cast(Config->Entry); PE->AddressOfEntryPoint = Entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (Config->Machine == ARMNT) PE->AddressOfEntryPoint |= 1; } PE->SizeOfStackReserve = Config->StackReserve; PE->SizeOfStackCommit = Config->StackCommit; PE->SizeOfHeapReserve = Config->HeapReserve; PE->SizeOfHeapCommit = Config->HeapCommit; // Import Descriptor Tables and Import Address Tables are merged // in our output. That's not compatible with the Binding feature // that is sort of prelinking. Setting this flag to make it clear // that our outputs are not for the Binding. PE->DLLCharacteristics = IMAGE_DLL_CHARACTERISTICS_NO_BIND; if (Config->AppContainer) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER; if (Config->DynamicBase) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; if (Config->HighEntropyVA) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; if (Config->NxCompat) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!Config->AllowIsolation) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; if (Config->TerminalServerAware) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; PE->NumberOfRvaAndSize = NumberfOfDataDirectory; if (OutputSection *Text = findSection(".text")) { PE->BaseOfCode = Text->getRVA(); PE->SizeOfCode = Text->getRawSize(); } PE->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *Dir = reinterpret_cast(Buf); Buf += sizeof(*Dir) * NumberfOfDataDirectory; if (OutputSection *Sec = findSection(".edata")) { Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[EXPORT_TABLE].Size = Sec->getVirtualSize(); } if (!Idata.empty()) { Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA(); Dir[IMPORT_TABLE].Size = Idata.getDirSize(); Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); Dir[IAT].Size = Idata.getIATSize(); } if (OutputSection *Sec = findSection(".rsrc")) { Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); } if (OutputSection *Sec = findSection(".pdata")) { Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); } if (OutputSection *Sec = findSection(".reloc")) { Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { if (Defined *B = dyn_cast(Sym->body())) { Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[TLS_TABLE].Size = Config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } if (Config->Debug) { Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA(); Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); } if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { if (auto *B = dyn_cast(Sym->body())) { SectionChunk *SC = B->getChunk(); assert(B->getRVA() >= SC->getRVA()); uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize()) fatal("_load_config_used is malformed"); ArrayRef SecContents = SC->getContents(); uint32_t LoadConfigSize = *reinterpret_cast(&SecContents[OffsetInChunk]); if (OffsetInChunk + LoadConfigSize > SC->getSize()) fatal("_load_config_used is too large"); Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA(); Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; } } if (!DelayIdata.empty()) { Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = DelayIdata.getDirRVA(); Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); } // Write section table for (OutputSection *Sec : OutputSections) { Sec->writeHeaderTo(Buf); Buf += sizeof(coff_section); } SectionTable = ArrayRef( Buf - OutputSections.size() * sizeof(coff_section), Buf); if (OutputSymtab.empty()) return; COFF->PointerToSymbolTable = PointerToSymbolTable; uint32_t NumberOfSymbols = OutputSymtab.size(); COFF->NumberOfSymbols = NumberOfSymbols; auto *SymbolTable = reinterpret_cast( Buffer->getBufferStart() + COFF->PointerToSymbolTable); for (size_t I = 0; I != NumberOfSymbols; ++I) SymbolTable[I] = OutputSymtab[I]; // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. Buf = reinterpret_cast(&SymbolTable[NumberOfSymbols]); write32le(Buf, Strtab.size() + 4); if (!Strtab.empty()) memcpy(Buf + 4, Strtab.data(), Strtab.size()); } void Writer::openFile(StringRef Path) { Buffer = check( FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable), "failed to open " + Path); } void Writer::fixSafeSEHSymbols() { if (!SEHTable) return; // Replace the absolute table symbol with a synthetic symbol pointing to the // SEHTable chunk so that we can emit base relocations for it and resolve // section relative relocations. Symbol *T = Symtab->find("___safe_se_handler_table"); Symbol *C = Symtab->find("___safe_se_handler_count"); replaceBody(T, T->body()->getName(), SEHTable); cast(C->body())->setVA(SEHTable->getSize() / 4); } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { for (auto &P : Config->Section) { StringRef Name = P.first; uint32_t Perm = P.second; if (auto *Sec = findSection(Name)) Sec->setPermissions(Perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { // Record the section index that should be used when resolving a section // relocation against an absolute symbol. DefinedAbsolute::OutputSectionIndex = OutputSections.size() + 1; uint8_t *Buf = Buffer->getBufferStart(); for (OutputSection *Sec : OutputSections) { uint8_t *SecBuf = Buf + Sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) memset(SecBuf, 0xCC, Sec->getRawSize()); for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(), [&](Chunk *C) { C->writeTo(SecBuf); }); } } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { OutputSection *Sec = findSection(".pdata"); if (!Sec) return; // We assume .pdata contains function table entries only. uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff(); uint8_t *End = Begin + Sec->getVirtualSize(); if (Config->Machine == AMD64) { struct Entry { ulittle32_t Begin, End, Unwind; }; sort(parallel::par, (Entry *)Begin, (Entry *)End, [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } if (Config->Machine == ARMNT) { struct Entry { ulittle32_t Begin, Unwind; }; sort(parallel::par, (Entry *)Begin, (Entry *)End, [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } errs() << "warning: don't know how to handle .pdata.\n"; } // Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us // to get reproducible builds. void Writer::writeBuildId() { // There is nothing to backfill if BuildId was not setup. if (BuildId == nullptr) return; assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 && "only PDB 7.0 is supported"); assert(sizeof(BuildId->DI->PDB70.Signature) == 16 && "signature size mismatch"); // Compute an MD5 hash. ArrayRef Buf(Buffer->getBufferStart(), Buffer->getBufferEnd()); memcpy(BuildId->DI->PDB70.Signature, MD5::hash(Buf).data(), 16); // TODO(compnerd) track the Age BuildId->DI->PDB70.Age = 1; } OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->getName() == Name) return Sec; return nullptr; } uint32_t Writer::getSizeOfInitializedData() { uint32_t Res = 0; for (OutputSection *S : OutputSections) if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA) Res += S->getRawSize(); return Res; } // Returns an existing section or create a new one if not found. OutputSection *Writer::createSection(StringRef Name) { if (auto *Sec = findSection(Name)) return Sec; const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const auto CODE = IMAGE_SCN_CNT_CODE; const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; const auto R = IMAGE_SCN_MEM_READ; const auto W = IMAGE_SCN_MEM_WRITE; const auto X = IMAGE_SCN_MEM_EXECUTE; uint32_t Perms = StringSwitch(Name) .Case(".bss", BSS | R | W) .Case(".data", DATA | R | W) .Cases(".didat", ".edata", ".idata", ".rdata", DATA | R) .Case(".reloc", DATA | DISCARDABLE | R) .Case(".text", CODE | R | X) .Default(0); if (!Perms) llvm_unreachable("unknown section name"); auto Sec = make(Name); Sec->addPermissions(Perms); OutputSections.push_back(Sec); return Sec; } // Dest is .reloc section. Add contents to that section. void Writer::addBaserels(OutputSection *Dest) { std::vector V; for (OutputSection *Sec : OutputSections) { if (Sec == Dest) continue; // Collect all locations for base relocations. for (Chunk *C : Sec->getChunks()) C->getBaserels(&V); // Add the addresses to .reloc section. if (!V.empty()) addBaserelBlocks(Dest, V); V.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(OutputSection *Dest, std::vector &V) { const uint32_t Mask = ~uint32_t(PageSize - 1); uint32_t Page = V[0].RVA & Mask; size_t I = 0, J = 1; for (size_t E = V.size(); J < E; ++J) { uint32_t P = V[J].RVA & Mask; if (P == Page) continue; Dest->addChunk(make(Page, &V[I], &V[0] + J)); I = J; Page = P; } if (I == J) return; Dest->addChunk(make(Page, &V[I], &V[0] + J)); } diff --git a/ELF/Arch/SPARCV9.cpp b/ELF/Arch/SPARCV9.cpp new file mode 100644 index 000000000000..1f977c1e9cf2 --- /dev/null +++ b/ELF/Arch/SPARCV9.cpp @@ -0,0 +1,149 @@ +//===- SPARCV9.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +namespace { +class SPARCV9 final : public TargetInfo { +public: + SPARCV9(); + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; + void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + int32_t Index, unsigned RelOff) const override; + void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; +}; +} // namespace + +SPARCV9::SPARCV9() { + CopyRel = R_SPARC_COPY; + GotRel = R_SPARC_GLOB_DAT; + PltRel = R_SPARC_JMP_SLOT; + RelativeRel = R_SPARC_RELATIVE; + GotEntrySize = 8; + PltEntrySize = 32; + PltHeaderSize = 4 * PltEntrySize; + + PageSize = 8192; + DefaultMaxPageSize = 0x100000; + DefaultImageBase = 0x100000; +} + +RelExpr SPARCV9::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { + switch (Type) { + case R_SPARC_32: + case R_SPARC_UA32: + case R_SPARC_64: + case R_SPARC_UA64: + return R_ABS; + case R_SPARC_PC10: + case R_SPARC_PC22: + case R_SPARC_DISP32: + case R_SPARC_WDISP30: + return R_PC; + case R_SPARC_GOT10: + return R_GOT_OFF; + case R_SPARC_GOT22: + return R_GOT_OFF; + case R_SPARC_WPLT30: + return R_PLT_PC; + case R_SPARC_NONE: + return R_NONE; + default: + error(toString(S.File) + ": unknown relocation type: " + toString(Type)); + return R_HINT; + } +} + +void SPARCV9::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { + switch (Type) { + case R_SPARC_32: + case R_SPARC_UA32: + // V-word32 + checkUInt<32>(Loc, Val, Type); + write32be(Loc, Val); + break; + case R_SPARC_DISP32: + // V-disp32 + checkInt<32>(Loc, Val, Type); + write32be(Loc, Val); + break; + case R_SPARC_WDISP30: + case R_SPARC_WPLT30: + // V-disp30 + checkInt<32>(Loc, Val, Type); + write32be(Loc, (read32be(Loc) & ~0x3fffffff) | ((Val >> 2) & 0x3fffffff)); + break; + case R_SPARC_22: + // V-imm22 + checkUInt<22>(Loc, Val, Type); + write32be(Loc, (read32be(Loc) & ~0x003fffff) | (Val & 0x003fffff)); + break; + case R_SPARC_GOT22: + case R_SPARC_PC22: + // T-imm22 + write32be(Loc, (read32be(Loc) & ~0x003fffff) | ((Val >> 10) & 0x003fffff)); + break; + case R_SPARC_WDISP19: + // V-disp19 + checkInt<21>(Loc, Val, Type); + write32be(Loc, (read32be(Loc) & ~0x0007ffff) | ((Val >> 2) & 0x0007ffff)); + break; + case R_SPARC_GOT10: + case R_SPARC_PC10: + // T-simm10 + write32be(Loc, (read32be(Loc) & ~0x000003ff) | (Val & 0x000003ff)); + break; + case R_SPARC_64: + case R_SPARC_UA64: + case R_SPARC_GLOB_DAT: + // V-xword64 + write64be(Loc, Val); + break; + default: + error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type)); + } +} + +void SPARCV9::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + const uint8_t PltData[] = { + 0x03, 0x00, 0x00, 0x00, // sethi (. - .PLT0), %g1 + 0x30, 0x68, 0x00, 0x00, // ba,a %xcc, .PLT1 + 0x01, 0x00, 0x00, 0x00, // nop + 0x01, 0x00, 0x00, 0x00, // nop + 0x01, 0x00, 0x00, 0x00, // nop + 0x01, 0x00, 0x00, 0x00, // nop + 0x01, 0x00, 0x00, 0x00, // nop + 0x01, 0x00, 0x00, 0x00 // nop + }; + memcpy(Buf, PltData, sizeof(PltData)); + + uint64_t Off = PltHeaderSize + Index * PltEntrySize; + relocateOne(Buf, R_SPARC_22, Off); + relocateOne(Buf + 4, R_SPARC_WDISP19, -(Off + 4 - PltEntrySize)); +} + +TargetInfo *elf::getSPARCV9TargetInfo() { + static SPARCV9 Target; + return &Target; +} diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt index b4bc215a77eb..77243bd494d1 100644 --- a/ELF/CMakeLists.txt +++ b/ELF/CMakeLists.txt @@ -1,74 +1,75 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) if(NOT LLD_BUILT_STANDALONE) set(tablegen_deps intrinsics_gen) endif() add_lld_library(lldELF Arch/AArch64.cpp Arch/AMDGPU.cpp Arch/ARM.cpp Arch/AVR.cpp Arch/Mips.cpp Arch/MipsArchTree.cpp Arch/PPC.cpp Arch/PPC64.cpp + Arch/SPARCV9.cpp Arch/X86.cpp Arch/X86_64.cpp Driver.cpp DriverUtils.cpp EhFrame.cpp Error.cpp Filesystem.cpp GdbIndex.cpp ICF.cpp InputFiles.cpp InputSection.cpp LTO.cpp LinkerScript.cpp MapFile.cpp MarkLive.cpp OutputSections.cpp Relocations.cpp ScriptLexer.cpp ScriptParser.cpp Strings.cpp SymbolTable.cpp Symbols.cpp SyntheticSections.cpp Target.cpp Thunks.cpp Writer.cpp LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Analysis BinaryFormat BitReader BitWriter Codegen Core DebugInfoDWARF Demangle IPO Linker LTO Object Option Passes MC Support Target TransformUtils LINK_LIBS lldConfig lldCore ${LLVM_PTHREAD_LIB} DEPENDS ELFOptionsTableGen ${tablegen_deps} ) diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp index 1ff0b4224e70..e07f24d665df 100644 --- a/ELF/InputFiles.cpp +++ b/ELF/InputFiles.cpp @@ -1,1113 +1,1113 @@ //===- InputFiles.cpp -----------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Error.h" #include "InputSection.h" #include "LinkerScript.h" #include "Memory.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/LTO/LTO.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::sys::fs; using namespace lld; using namespace lld::elf; TarWriter *elf::Tar; InputFile::InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} namespace { // In ELF object file all section addresses are zero. If we have multiple // .text sections (when using -ffunction-section or comdat group) then // LLVM DWARF parser will not be able to parse .debug_line correctly, unless // we assign each section some unique address. This callback method assigns // each section an address equal to its offset in ELF object file. class ObjectInfo : public LoadedObjectInfo { public: uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { return static_cast(Sec).getOffset(); } std::unique_ptr clone() const override { return std::unique_ptr(); } }; } Optional elf::readFile(StringRef Path) { log(Path); auto MBOrErr = MemoryBuffer::getFile(Path); if (auto EC = MBOrErr.getError()) { error("cannot open " + Path + ": " + EC.message()); return None; } std::unique_ptr &MB = *MBOrErr; MemoryBufferRef MBRef = MB->getMemBufferRef(); make>(std::move(MB)); // take MB ownership if (Tar) Tar->append(relativeToRoot(Path), MBRef.getBuffer()); return MBRef; } template void elf::ObjectFile::initializeDwarfLine() { std::unique_ptr Obj = check(object::ObjectFile::createObjectFile(this->MB), toString(this)); ObjectInfo ObjInfo; DWARFContextInMemory Dwarf(*Obj, &ObjInfo); - DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); - DataExtractor LineData(Dwarf.getLineSection().Data, Config->IsLE, - Config->Wordsize); + DwarfLine.reset(new DWARFDebugLine); + DWARFDataExtractor LineData(Dwarf.getLineSection(), Config->IsLE, + Config->Wordsize); // The second parameter is offset in .debug_line section // for compilation unit (CU) of interest. We have only one // CU (object file), so offset is always 0. DwarfLine->getOrParseLineTable(LineData, 0); } // Returns source line information for a given offset // using DWARF debug info. template Optional elf::ObjectFile::getDILineInfo(InputSectionBase *S, uint64_t Offset) { if (!DwarfLine) initializeDwarfLine(); // The offset to CU is 0. const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); if (!Tbl) return None; // Use fake address calcuated by adding section file offset and offset in // section. See comments for ObjectInfo class. DILineInfo Info; Tbl->getFileLineInfoForAddress( S->getOffsetInFile() + Offset, nullptr, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); if (Info.Line == 0) return None; return Info; } // Returns source line information for a given offset // using DWARF debug info. template std::string elf::ObjectFile::getLineInfo(InputSectionBase *S, uint64_t Offset) { if (Optional Info = getDILineInfo(S, Offset)) return Info->FileName + ":" + std::to_string(Info->Line); return ""; } // Returns "", "foo.a(bar.o)" or "baz.o". std::string lld::toString(const InputFile *F) { if (!F) return ""; if (F->ToStringCache.empty()) { if (F->ArchiveName.empty()) F->ToStringCache = F->getName(); else F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str(); } return F->ToStringCache; } template ELFFileBase::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) { if (ELFT::TargetEndianness == support::little) EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind; else EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind; EMachine = getObj().getHeader()->e_machine; OSABI = getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI]; } template typename ELFT::SymRange ELFFileBase::getGlobalSymbols() { return makeArrayRef(Symbols.begin() + FirstNonLocal, Symbols.end()); } template uint32_t ELFFileBase::getSectionIndex(const Elf_Sym &Sym) const { return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX), toString(this)); } template void ELFFileBase::initSymtab(ArrayRef Sections, const Elf_Shdr *Symtab) { FirstNonLocal = Symtab->sh_info; Symbols = check(getObj().symbols(Symtab), toString(this)); if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) fatal(toString(this) + ": invalid sh_info in symbol table"); StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections), toString(this)); } template elf::ObjectFile::ObjectFile(MemoryBufferRef M, StringRef ArchiveName) : ELFFileBase(Base::ObjectKind, M) { this->ArchiveName = ArchiveName; } template ArrayRef elf::ObjectFile::getLocalSymbols() { if (this->SymbolBodies.empty()) return this->SymbolBodies; return makeArrayRef(this->SymbolBodies).slice(1, this->FirstNonLocal - 1); } template ArrayRef elf::ObjectFile::getSymbols() { if (this->SymbolBodies.empty()) return this->SymbolBodies; return makeArrayRef(this->SymbolBodies).slice(1); } template void elf::ObjectFile::parse(DenseSet &ComdatGroups) { // Read section and symbol tables. initializeSections(ComdatGroups); initializeSymbols(); } // Sections with SHT_GROUP and comdat bits define comdat section groups. // They are identified and deduplicated by group name. This function // returns a group name. template StringRef elf::ObjectFile::getShtGroupSignature(ArrayRef Sections, const Elf_Shdr &Sec) { // Group signatures are stored as symbol names in object files. // sh_info contains a symbol index, so we fetch a symbol and read its name. if (this->Symbols.empty()) this->initSymtab( Sections, check(object::getSection(Sections, Sec.sh_link), toString(this))); const Elf_Sym *Sym = check( object::getSymbol(this->Symbols, Sec.sh_info), toString(this)); StringRef Signature = check(Sym->getName(this->StringTable), toString(this)); // As a special case, if a symbol is a section symbol and has no name, // we use a section name as a signature. // // Such SHT_GROUP sections are invalid from the perspective of the ELF // standard, but GNU gold 1.14 (the neweset version as of July 2017) or // older produce such sections as outputs for the -r option, so we need // a bug-compatibility. if (Signature.empty() && Sym->getType() == STT_SECTION) return getSectionName(Sec); return Signature; } template ArrayRef::Elf_Word> elf::ObjectFile::getShtGroupEntries(const Elf_Shdr &Sec) { const ELFFile &Obj = this->getObj(); ArrayRef Entries = check( Obj.template getSectionContentsAsArray(&Sec), toString(this)); if (Entries.empty() || Entries[0] != GRP_COMDAT) fatal(toString(this) + ": unsupported SHT_GROUP format"); return Entries.slice(1); } template bool elf::ObjectFile::shouldMerge(const Elf_Shdr &Sec) { // We don't merge sections if -O0 (default is -O1). This makes sometimes // the linker significantly faster, although the output will be bigger. if (Config->Optimize == 0) return false; // Do not merge sections if generating a relocatable object. It makes // the code simpler because we do not need to update relocation addends // to reflect changes introduced by merging. Instead of that we write // such "merge" sections into separate OutputSections and keep SHF_MERGE // / SHF_STRINGS flags and sh_entsize value to be able to perform merging // later during a final linking. if (Config->Relocatable) return false; // A mergeable section with size 0 is useless because they don't have // any data to merge. A mergeable string section with size 0 can be // argued as invalid because it doesn't end with a null character. // We'll avoid a mess by handling them as if they were non-mergeable. if (Sec.sh_size == 0) return false; // Check for sh_entsize. The ELF spec is not clear about the zero // sh_entsize. It says that "the member [sh_entsize] contains 0 if // the section does not hold a table of fixed-size entries". We know // that Rust 1.13 produces a string mergeable section with a zero // sh_entsize. Here we just accept it rather than being picky about it. uint64_t EntSize = Sec.sh_entsize; if (EntSize == 0) return false; if (Sec.sh_size % EntSize) fatal(toString(this) + ": SHF_MERGE section size must be a multiple of sh_entsize"); uint64_t Flags = Sec.sh_flags; if (!(Flags & SHF_MERGE)) return false; if (Flags & SHF_WRITE) fatal(toString(this) + ": writable SHF_MERGE section is not supported"); // Don't try to merge if the alignment is larger than the sh_entsize and this // is not SHF_STRINGS. // // Since this is not a SHF_STRINGS, we would need to pad after every entity. // It would be equivalent for the producer of the .o to just set a larger // sh_entsize. if (Flags & SHF_STRINGS) return true; return Sec.sh_addralign <= EntSize; } template void elf::ObjectFile::initializeSections( DenseSet &ComdatGroups) { const ELFFile &Obj = this->getObj(); ArrayRef ObjSections = check(this->getObj().sections(), toString(this)); uint64_t Size = ObjSections.size(); this->Sections.resize(Size); this->SectionStringTable = check(Obj.getSectionStringTable(ObjSections), toString(this)); for (size_t I = 0, E = ObjSections.size(); I < E; I++) { if (this->Sections[I] == &InputSection::Discarded) continue; const Elf_Shdr &Sec = ObjSections[I]; // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. // This is compatible with GNU. if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { this->Sections[I] = &InputSection::Discarded; continue; } switch (Sec.sh_type) { case SHT_GROUP: { // De-duplicate section groups by their signatures. StringRef Signature = getShtGroupSignature(ObjSections, Sec); bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second; this->Sections[I] = &InputSection::Discarded; // If it is a new section group, we want to keep group members. // Group leader sections, which contain indices of group members, are // discarded because they are useless beyond this point. The only // exception is the -r option because in order to produce re-linkable // object files, we want to pass through basically everything. if (IsNew) { if (Config->Relocatable) this->Sections[I] = createInputSection(Sec); continue; } // Otherwise, discard group members. for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) fatal(toString(this) + ": invalid section index in group: " + Twine(SecIndex)); this->Sections[SecIndex] = &InputSection::Discarded; } break; } case SHT_SYMTAB: this->initSymtab(ObjSections, &Sec); break; case SHT_SYMTAB_SHNDX: this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections), toString(this)); break; case SHT_STRTAB: case SHT_NULL: break; default: this->Sections[I] = createInputSection(Sec); } // .ARM.exidx sections have a reverse dependency on the InputSection they // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. if (Sec.sh_flags & SHF_LINK_ORDER) { if (Sec.sh_link >= this->Sections.size()) fatal(toString(this) + ": invalid sh_link index: " + Twine(Sec.sh_link)); this->Sections[Sec.sh_link]->DependentSections.push_back( this->Sections[I]); } } } template InputSectionBase *elf::ObjectFile::getRelocTarget(const Elf_Shdr &Sec) { uint32_t Idx = Sec.sh_info; if (Idx >= this->Sections.size()) fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); InputSectionBase *Target = this->Sections[Idx]; // Strictly speaking, a relocation section must be included in the // group of the section it relocates. However, LLVM 3.3 and earlier // would fail to do so, so we gracefully handle that case. if (Target == &InputSection::Discarded) return nullptr; if (!Target) fatal(toString(this) + ": unsupported relocation reference"); return Target; } // Create a regular InputSection class that has the same contents // as a given section. InputSectionBase *toRegularSection(MergeInputSection *Sec) { auto *Ret = make(Sec->Flags, Sec->Type, Sec->Alignment, Sec->Data, Sec->Name); Ret->File = Sec->File; return Ret; } template InputSectionBase * elf::ObjectFile::createInputSection(const Elf_Shdr &Sec) { StringRef Name = getSectionName(Sec); switch (Sec.sh_type) { case SHT_ARM_ATTRIBUTES: // FIXME: ARM meta-data section. Retain the first attribute section // we see. The eglibc ARM dynamic loaders require the presence of an // attribute section for dlopen to work. // In a full implementation we would merge all attribute sections. if (InX::ARMAttributes == nullptr) { InX::ARMAttributes = make(this, &Sec, Name); return InX::ARMAttributes; } return &InputSection::Discarded; case SHT_RELA: case SHT_REL: { // Find the relocation target section and associate this // section with it. Target can be discarded, for example // if it is a duplicated member of SHT_GROUP section, we // do not create or proccess relocatable sections then. InputSectionBase *Target = getRelocTarget(Sec); if (!Target) return nullptr; // This section contains relocation information. // If -r is given, we do not interpret or apply relocation // but just copy relocation sections to output. if (Config->Relocatable) return make(this, &Sec, Name); if (Target->FirstRelocation) fatal(toString(this) + ": multiple relocation sections to one section are not supported"); // Mergeable sections with relocations are tricky because relocations // need to be taken into account when comparing section contents for // merging. It's not worth supporting such mergeable sections because // they are rare and it'd complicates the internal design (we usually // have to determine if two sections are mergeable early in the link // process much before applying relocations). We simply handle mergeable // sections with relocations as non-mergeable. if (auto *MS = dyn_cast(Target)) { Target = toRegularSection(MS); this->Sections[Sec.sh_info] = Target; } size_t NumRelocations; if (Sec.sh_type == SHT_RELA) { ArrayRef Rels = check(this->getObj().relas(&Sec), toString(this)); Target->FirstRelocation = Rels.begin(); NumRelocations = Rels.size(); Target->AreRelocsRela = true; } else { ArrayRef Rels = check(this->getObj().rels(&Sec), toString(this)); Target->FirstRelocation = Rels.begin(); NumRelocations = Rels.size(); Target->AreRelocsRela = false; } assert(isUInt<31>(NumRelocations)); Target->NumRelocations = NumRelocations; // Relocation sections processed by the linker are usually removed // from the output, so returning `nullptr` for the normal case. // However, if -emit-relocs is given, we need to leave them in the output. // (Some post link analysis tools need this information.) if (Config->EmitRelocs) { InputSection *RelocSec = make(this, &Sec, Name); // We will not emit relocation section if target was discarded. Target->DependentSections.push_back(RelocSec); return RelocSec; } return nullptr; } } // The GNU linker uses .note.GNU-stack section as a marker indicating // that the code in the object file does not expect that the stack is // executable (in terms of NX bit). If all input files have the marker, // the GNU linker adds a PT_GNU_STACK segment to tells the loader to // make the stack non-executable. Most object files have this section as // of 2017. // // But making the stack non-executable is a norm today for security // reasons. Failure to do so may result in a serious security issue. // Therefore, we make LLD always add PT_GNU_STACK unless it is // explicitly told to do otherwise (by -z execstack). Because the stack // executable-ness is controlled solely by command line options, // .note.GNU-stack sections are simply ignored. if (Name == ".note.GNU-stack") return &InputSection::Discarded; // Split stacks is a feature to support a discontiguous stack. At least // as of 2017, it seems that the feature is not being used widely. // Only GNU gold supports that. We don't. For the details about that, // see https://gcc.gnu.org/wiki/SplitStacks if (Name == ".note.GNU-split-stack") { error(toString(this) + ": object file compiled with -fsplit-stack is not supported"); return &InputSection::Discarded; } if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) return &InputSection::Discarded; // If -gdb-index is given, LLD creates .gdb_index section, and that // section serves the same purpose as .debug_gnu_pub{names,types} sections. // If that's the case, we want to eliminate .debug_gnu_pub{names,types} // because they are redundant and can waste large amount of disk space // (for example, they are about 400 MiB in total for a clang debug build.) if (Config->GdbIndex && (Name == ".debug_gnu_pubnames" || Name == ".debug_gnu_pubtypes")) return &InputSection::Discarded; // The linkonce feature is a sort of proto-comdat. Some glibc i386 object // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce // sections. Drop those sections to avoid duplicate symbol errors. // FIXME: This is glibc PR20543, we should remove this hack once that has been // fixed for a while. if (Name.startswith(".gnu.linkonce.")) return &InputSection::Discarded; // The linker merges EH (exception handling) frames and creates a // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (Name == ".eh_frame" && !Config->Relocatable) return make(this, &Sec, Name); if (shouldMerge(Sec)) return make(this, &Sec, Name); return make(this, &Sec, Name); } template StringRef elf::ObjectFile::getSectionName(const Elf_Shdr &Sec) { return check(this->getObj().getSectionName(&Sec, SectionStringTable), toString(this)); } template void elf::ObjectFile::initializeSymbols() { SymbolBodies.reserve(this->Symbols.size()); for (const Elf_Sym &Sym : this->Symbols) SymbolBodies.push_back(createSymbolBody(&Sym)); } template InputSectionBase *elf::ObjectFile::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); if (Index >= this->Sections.size()) fatal(toString(this) + ": invalid section index: " + Twine(Index)); InputSectionBase *S = this->Sections[Index]; // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be // associated with SHT_REL[A]/SHT_SYMTAB/SHT_STRTAB sections. // In this case it is fine for section to be null here as we do not // allocate sections of these types. if (!S) { if (Index == 0 || Sym.getType() == STT_SECTION || Sym.getType() == STT_NOTYPE) return nullptr; fatal(toString(this) + ": invalid section index: " + Twine(Index)); } if (S == &InputSection::Discarded) return S; return S->Repl; } template SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); InputSectionBase *Sec = getSection(*Sym); uint8_t StOther = Sym->st_other; uint8_t Type = Sym->getType(); uint64_t Value = Sym->st_value; uint64_t Size = Sym->st_size; if (Binding == STB_LOCAL) { if (Sym->getType() == STT_FILE) SourceFile = check(Sym->getName(this->StringTable), toString(this)); if (this->StringTable.size() <= Sym->st_name) fatal(toString(this) + ": invalid symbol name offset"); StringRefZ Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) return make(Name, /*IsLocal=*/true, StOther, Type, this); return make(Name, /*IsLocal=*/true, StOther, Type, Value, Size, Sec, this); } StringRef Name = check(Sym->getName(this->StringTable), toString(this)); switch (Sym->st_shndx) { case SHN_UNDEF: return elf::Symtab::X ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, /*CanOmitFromDynSym=*/false, this) ->body(); case SHN_COMMON: if (Value == 0 || Value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + Name + "' has invalid alignment: " + Twine(Value)); return elf::Symtab::X ->addCommon(Name, Size, Value, Binding, StOther, Type, this) ->body(); } switch (Binding) { default: fatal(toString(this) + ": unexpected binding: " + Twine(Binding)); case STB_GLOBAL: case STB_WEAK: case STB_GNU_UNIQUE: if (Sec == &InputSection::Discarded) return elf::Symtab::X ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, /*CanOmitFromDynSym=*/false, this) ->body(); return elf::Symtab::X ->addRegular(Name, StOther, Type, Value, Size, Binding, Sec, this) ->body(); } } ArchiveFile::ArchiveFile(std::unique_ptr &&File) : InputFile(ArchiveKind, File->getMemoryBufferRef()), File(std::move(File)) {} template void ArchiveFile::parse() { Symbols.reserve(File->getNumberOfSymbols()); for (const Archive::Symbol &Sym : File->symbols()) Symbols.push_back(Symtab::X->addLazyArchive(this, Sym)); } // Returns a buffer pointing to a member file containing a given symbol. std::pair ArchiveFile::getMember(const Archive::Symbol *Sym) { Archive::Child C = check(Sym->getMember(), toString(this) + ": could not get the member for symbol " + Sym->getName()); if (!Seen.insert(C.getChildOffset()).second) return {MemoryBufferRef(), 0}; MemoryBufferRef Ret = check(C.getMemoryBufferRef(), toString(this) + ": could not get the buffer for the member defining symbol " + Sym->getName()); if (C.getParent()->isThin() && Tar) Tar->append(relativeToRoot(check(C.getFullName(), toString(this))), Ret.getBuffer()); if (C.getParent()->isThin()) return {Ret, 0}; return {Ret, C.getChildOffset()}; } template SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName) : ELFFileBase(Base::SharedKind, M), SoName(DefaultSoName), AsNeeded(Config->AsNeeded) {} template const typename ELFT::Shdr * SharedFile::getSection(const Elf_Sym &Sym) const { return check( this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX), toString(this)); } // Partially parse the shared object file so that we can call // getSoName on this object. template void SharedFile::parseSoName() { const Elf_Shdr *DynamicSec = nullptr; const ELFFile Obj = this->getObj(); ArrayRef Sections = check(Obj.sections(), toString(this)); // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { default: continue; case SHT_DYNSYM: this->initSymtab(Sections, &Sec); break; case SHT_DYNAMIC: DynamicSec = &Sec; break; case SHT_SYMTAB_SHNDX: this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections), toString(this)); break; case SHT_GNU_versym: this->VersymSec = &Sec; break; case SHT_GNU_verdef: this->VerdefSec = &Sec; break; } } if (this->VersymSec && this->Symbols.empty()) error("SHT_GNU_versym should be associated with symbol table"); // Search for a DT_SONAME tag to initialize this->SoName. if (!DynamicSec) return; ArrayRef Arr = check(Obj.template getSectionContentsAsArray(DynamicSec), toString(this)); for (const Elf_Dyn &Dyn : Arr) { if (Dyn.d_tag == DT_SONAME) { uint64_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) fatal(toString(this) + ": invalid DT_SONAME entry"); SoName = this->StringTable.data() + Val; return; } } } // Parse the version definitions in the object file if present. Returns a vector // whose nth element contains a pointer to the Elf_Verdef for version identifier // n. Version identifiers that are not definitions map to nullptr. The array // always has at least length 1. template std::vector SharedFile::parseVerdefs(const Elf_Versym *&Versym) { std::vector Verdefs(1); // We only need to process symbol versions for this DSO if it has both a // versym and a verdef section, which indicates that the DSO contains symbol // version definitions. if (!VersymSec || !VerdefSec) return Verdefs; // The location of the first global versym entry. const char *Base = this->MB.getBuffer().data(); Versym = reinterpret_cast(Base + VersymSec->sh_offset) + this->FirstNonLocal; // We cannot determine the largest verdef identifier without inspecting // every Elf_Verdef, but both bfd and gold assign verdef identifiers // sequentially starting from 1, so we predict that the largest identifier // will be VerdefCount. unsigned VerdefCount = VerdefSec->sh_info; Verdefs.resize(VerdefCount + 1); // Build the Verdefs array by following the chain of Elf_Verdef objects // from the start of the .gnu.version_d section. const char *Verdef = Base + VerdefSec->sh_offset; for (unsigned I = 0; I != VerdefCount; ++I) { auto *CurVerdef = reinterpret_cast(Verdef); Verdef += CurVerdef->vd_next; unsigned VerdefIndex = CurVerdef->vd_ndx; if (Verdefs.size() <= VerdefIndex) Verdefs.resize(VerdefIndex + 1); Verdefs[VerdefIndex] = CurVerdef; } return Verdefs; } // Fully parse the shared object file. This must be called after parseSoName(). template void SharedFile::parseRest() { // Create mapping from version identifiers to Elf_Verdef entries. const Elf_Versym *Versym = nullptr; std::vector Verdefs = parseVerdefs(Versym); Elf_Sym_Range Syms = this->getGlobalSymbols(); for (const Elf_Sym &Sym : Syms) { unsigned VersymIndex = 0; if (Versym) { VersymIndex = Versym->vs_index; ++Versym; } bool Hidden = VersymIndex & VERSYM_HIDDEN; VersymIndex = VersymIndex & ~VERSYM_HIDDEN; StringRef Name = check(Sym.getName(this->StringTable), toString(this)); if (Sym.isUndefined()) { Undefs.push_back(Name); continue; } // Ignore local symbols. if (Versym && VersymIndex == VER_NDX_LOCAL) continue; const Elf_Verdef *V = VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex]; if (!Hidden) elf::Symtab::X->addShared(this, Name, Sym, V); // Also add the symbol with the versioned name to handle undefined symbols // with explicit versions. if (V) { StringRef VerName = this->StringTable.data() + V->getAux()->vda_name; Name = Saver.save(Name + "@" + VerName); elf::Symtab::X->addShared(this, Name, Sym, V); } } } static ELFKind getBitcodeELFKind(const Triple &T) { if (T.isLittleEndian()) return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) { switch (T.getArch()) { case Triple::aarch64: return EM_AARCH64; case Triple::arm: case Triple::thumb: return EM_ARM; case Triple::avr: return EM_AVR; case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: return EM_MIPS; case Triple::ppc: return EM_PPC; case Triple::ppc64: return EM_PPC64; case Triple::x86: return T.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: return EM_X86_64; default: fatal(Path + ": could not infer e_machine from bitcode target triple " + T.str()); } } BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName, uint64_t OffsetInArchive) : InputFile(BitcodeKind, MB) { this->ArchiveName = ArchiveName; // Here we pass a new MemoryBufferRef which is identified by ArchiveName // (the fully resolved path of the archive) + member name + offset of the // member in the archive. // ThinLTO uses the MemoryBufferRef identifier to access its internal // data structures and if two archives define two members with the same name, // this causes a collision which result in only one of the objects being // taken into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). MemoryBufferRef MBRef(MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + utostr(OffsetInArchive))); Obj = check(lto::InputFile::create(MBRef), toString(this)); Triple T(Obj->getTargetTriple()); EKind = getBitcodeELFKind(T); EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T); } static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { switch (GvVisibility) { case GlobalValue::DefaultVisibility: return STV_DEFAULT; case GlobalValue::HiddenVisibility: return STV_HIDDEN; case GlobalValue::ProtectedVisibility: return STV_PROTECTED; } llvm_unreachable("unknown visibility"); } template static Symbol *createBitcodeSymbol(const std::vector &KeptComdats, const lto::InputFile::Symbol &ObjSym, BitcodeFile *F) { StringRef NameRef = Saver.save(ObjSym.getName()); uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL; uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); int C = ObjSym.getComdatIndex(); if (C != -1 && !KeptComdats[C]) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); if (ObjSym.isUndefined()) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); if (ObjSym.isCommon()) return Symtab::X->addCommon(NameRef, ObjSym.getCommonSize(), ObjSym.getCommonAlignment(), Binding, Visibility, STT_OBJECT, F); return Symtab::X->addBitcode(NameRef, Binding, Visibility, Type, CanOmitFromDynSym, F); } template void BitcodeFile::parse(DenseSet &ComdatGroups) { std::vector KeptComdats; for (StringRef S : Obj->getComdatTable()) KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, this)); } static ELFKind getELFKind(MemoryBufferRef MB) { unsigned char Size; unsigned char Endian; std::tie(Size, Endian) = getElfArchType(MB.getBuffer()); if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB) fatal(MB.getBufferIdentifier() + ": invalid data encoding"); if (Size != ELFCLASS32 && Size != ELFCLASS64) fatal(MB.getBufferIdentifier() + ": invalid file class"); size_t BufSize = MB.getBuffer().size(); if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) || (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr))) fatal(MB.getBufferIdentifier() + ": file is too short"); if (Size == ELFCLASS32) return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; } template void BinaryFile::parse() { ArrayRef Data = toArrayRef(MB.getBuffer()); auto *Section = make(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data"); Sections.push_back(Section); // For each input file foo that is embedded to a result as a binary // blob, we define _binary_foo_{start,end,size} symbols, so that // user programs can access blobs by name. Non-alphanumeric // characters in a filename are replaced with underscore. std::string S = "_binary_" + MB.getBufferIdentifier().str(); for (size_t I = 0; I < S.size(); ++I) if (!isalnum(S[I])) S[I] = '_'; elf::Symtab::X->addRegular(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0, STB_GLOBAL, Section, nullptr); elf::Symtab::X->addRegular(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT, Data.size(), 0, STB_GLOBAL, Section, nullptr); elf::Symtab::X->addRegular(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT, Data.size(), 0, STB_GLOBAL, nullptr, nullptr); } static bool isBitcode(MemoryBufferRef MB) { using namespace sys::fs; return identify_magic(MB.getBuffer()) == file_magic::bitcode; } InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, uint64_t OffsetInArchive) { if (isBitcode(MB)) return make(MB, ArchiveName, OffsetInArchive); switch (getELFKind(MB)) { case ELF32LEKind: return make>(MB, ArchiveName); case ELF32BEKind: return make>(MB, ArchiveName); case ELF64LEKind: return make>(MB, ArchiveName); case ELF64BEKind: return make>(MB, ArchiveName); default: llvm_unreachable("getELFKind"); } } InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) { switch (getELFKind(MB)) { case ELF32LEKind: return make>(MB, DefaultSoName); case ELF32BEKind: return make>(MB, DefaultSoName); case ELF64LEKind: return make>(MB, DefaultSoName); case ELF64BEKind: return make>(MB, DefaultSoName); default: llvm_unreachable("getELFKind"); } } MemoryBufferRef LazyObjectFile::getBuffer() { if (Seen) return MemoryBufferRef(); Seen = true; return MB; } InputFile *LazyObjectFile::fetch() { MemoryBufferRef MBRef = getBuffer(); if (MBRef.getBuffer().empty()) return nullptr; return createObjectFile(MBRef, ArchiveName, OffsetInArchive); } template void LazyObjectFile::parse() { for (StringRef Sym : getSymbols()) Symtab::X->addLazyObject(Sym, *this); } template std::vector LazyObjectFile::getElfSymbols() { typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::SymRange Elf_Sym_Range; const ELFFile Obj(this->MB.getBuffer()); ArrayRef Sections = check(Obj.sections(), toString(this)); for (const Elf_Shdr &Sec : Sections) { if (Sec.sh_type != SHT_SYMTAB) continue; Elf_Sym_Range Syms = check(Obj.symbols(&Sec), toString(this)); uint32_t FirstNonLocal = Sec.sh_info; StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections), toString(this)); std::vector V; for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) if (Sym.st_shndx != SHN_UNDEF) V.push_back(check(Sym.getName(StringTable), toString(this))); return V; } return {}; } std::vector LazyObjectFile::getBitcodeSymbols() { std::unique_ptr Obj = check(lto::InputFile::create(this->MB), toString(this)); std::vector V; for (const lto::InputFile::Symbol &Sym : Obj->symbols()) if (!Sym.isUndefined()) V.push_back(Saver.save(Sym.getName())); return V; } // Returns a vector of globally-visible defined symbol names. std::vector LazyObjectFile::getSymbols() { if (isBitcode(this->MB)) return getBitcodeSymbols(); switch (getELFKind(this->MB)) { case ELF32LEKind: return getElfSymbols(); case ELF32BEKind: return getElfSymbols(); case ELF64LEKind: return getElfSymbols(); case ELF64BEKind: return getElfSymbols(); default: llvm_unreachable("getELFKind"); } } template void ArchiveFile::parse(); template void ArchiveFile::parse(); template void ArchiveFile::parse(); template void ArchiveFile::parse(); template void BitcodeFile::parse(DenseSet &); template void BitcodeFile::parse(DenseSet &); template void BitcodeFile::parse(DenseSet &); template void BitcodeFile::parse(DenseSet &); template void LazyObjectFile::parse(); template void LazyObjectFile::parse(); template void LazyObjectFile::parse(); template void LazyObjectFile::parse(); template class elf::ELFFileBase; template class elf::ELFFileBase; template class elf::ELFFileBase; template class elf::ELFFileBase; template class elf::ObjectFile; template class elf::ObjectFile; template class elf::ObjectFile; template class elf::ObjectFile; template class elf::SharedFile; template class elf::SharedFile; template class elf::SharedFile; template class elf::SharedFile; template void BinaryFile::parse(); template void BinaryFile::parse(); template void BinaryFile::parse(); template void BinaryFile::parse(); diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp index ab8802c86d8e..d75b89f17527 100644 --- a/ELF/SymbolTable.cpp +++ b/ELF/SymbolTable.cpp @@ -1,759 +1,769 @@ //===- SymbolTable.cpp ----------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Symbol table is a bag of all known symbols. We put all symbols of // all input files to the symbol table. The symbol table is basically // a hash table with the logic to resolve symbol name conflicts using // the symbol types. // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "Config.h" #include "Error.h" #include "LinkerScript.h" #include "Memory.h" #include "Symbols.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; // All input object files must be for the same architecture // (e.g. it does not make sense to link x86 object files with // MIPS object files.) This function checks for that error. template static bool isCompatible(InputFile *F) { if (!isa>(F) && !isa(F)) return true; if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) { if (Config->EMachine != EM_MIPS) return true; if (isMipsN32Abi(F) == Config->MipsN32Abi) return true; } if (!Config->Emulation.empty()) error(toString(F) + " is incompatible with " + Config->Emulation); else error(toString(F) + " is incompatible with " + toString(Config->FirstElf)); return false; } // Add symbols in File to the symbol table. template void SymbolTable::addFile(InputFile *File) { if (!Config->FirstElf && isa>(File)) Config->FirstElf = File; if (!isCompatible(File)) return; // Binary file if (auto *F = dyn_cast(File)) { BinaryFiles.push_back(F); F->parse(); return; } // .a file if (auto *F = dyn_cast(File)) { F->parse(); return; } // Lazy object file if (auto *F = dyn_cast(File)) { F->parse(); return; } if (Config->Trace) message(toString(File)); // .so file if (auto *F = dyn_cast>(File)) { // DSOs are uniquified not by filename but by soname. F->parseSoName(); if (ErrorCount || !SoNames.insert(F->SoName).second) return; SharedFiles.push_back(F); F->parseRest(); return; } // LLVM bitcode file if (auto *F = dyn_cast(File)) { BitcodeFiles.push_back(F); F->parse(ComdatGroups); return; } // Regular object file auto *F = cast>(File); ObjectFiles.push_back(F); F->parse(ComdatGroups); } // This function is where all the optimizations of link-time // optimization happens. When LTO is in use, some input files are // not in native object file format but in the LLVM bitcode format. // This function compiles bitcode files into a few big native files // using LLVM functions and replaces bitcode symbols with the results. // Because all bitcode files that consist of a program are passed // to the compiler at once, it can do whole-program optimization. template void SymbolTable::addCombinedLTOObject() { if (BitcodeFiles.empty()) return; // Compile bitcode files and replace bitcode symbols. LTO.reset(new BitcodeCompiler); for (BitcodeFile *F : BitcodeFiles) LTO->add(*F); for (InputFile *File : LTO->compile()) { ObjectFile *Obj = cast>(File); DenseSet DummyGroups; Obj->parse(DummyGroups); ObjectFiles.push_back(Obj); } } template DefinedRegular *SymbolTable::addAbsolute(StringRef Name, uint8_t Visibility, uint8_t Binding) { Symbol *Sym = addRegular(Name, Visibility, STT_NOTYPE, 0, 0, Binding, nullptr, nullptr); return cast(Sym->body()); } // Add Name as an "ignored" symbol. An ignored symbol is a regular // linker-synthesized defined symbol, but is only defined if needed. template DefinedRegular *SymbolTable::addIgnored(StringRef Name, uint8_t Visibility) { SymbolBody *S = find(Name); if (!S || S->isInCurrentDSO()) return nullptr; return addAbsolute(Name, Visibility); } // Set a flag for --trace-symbol so that we can print out a log message // if a new symbol with the same name is inserted into the symbol table. template void SymbolTable::trace(StringRef Name) { Symtab.insert({CachedHashStringRef(Name), {-1, true}}); } // Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM. // Used to implement --wrap. template void SymbolTable::addSymbolWrap(StringRef Name) { SymbolBody *B = find(Name); if (!B) return; Symbol *Sym = B->symbol(); Symbol *Real = addUndefined(Saver.save("__real_" + Name)); Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); // Tell LTO not to eliminate this symbol Wrap->IsUsedInRegularObj = true; Config->RenamedSymbols[Real] = {Sym, Real->Binding}; Config->RenamedSymbols[Sym] = {Wrap, Sym->Binding}; } // Creates alias for symbol. Used to implement --defsym=ALIAS=SYM. template void SymbolTable::addSymbolAlias(StringRef Alias, StringRef Name) { SymbolBody *B = find(Name); if (!B) { error("-defsym: undefined symbol: " + Name); return; } Symbol *Sym = B->symbol(); Symbol *AliasSym = addUndefined(Alias); // Tell LTO not to eliminate this symbol Sym->IsUsedInRegularObj = true; Config->RenamedSymbols[AliasSym] = {Sym, AliasSym->Binding}; } // Apply symbol renames created by -wrap and -defsym. The renames are created // before LTO in addSymbolWrap() and addSymbolAlias() to have a chance to inform // LTO (if LTO is running) not to include these symbols in IPO. Now that the // symbols are finalized, we can perform the replacement. template void SymbolTable::applySymbolRenames() { for (auto &KV : Config->RenamedSymbols) { Symbol *Dst = KV.first; Symbol *Src = KV.second.Target; + Dst->body()->copy(Src->body()); Dst->Binding = KV.second.OriginalBinding; - - // We rename symbols by replacing the old symbol's SymbolBody with - // the new symbol's SymbolBody. The only attribute we want to keep - // is the symbol name, so that two symbols don't have the same name. - StringRef S = Dst->body()->getName(); - memcpy(Dst->Body.buffer, Src->Body.buffer, sizeof(Symbol::Body)); - Dst->body()->setName(S); } } static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { if (VA == STV_DEFAULT) return VB; if (VB == STV_DEFAULT) return VA; return std::min(VA, VB); } // Find an existing symbol or create and insert a new one. template std::pair SymbolTable::insert(StringRef Name) { auto P = Symtab.insert( {CachedHashStringRef(Name), SymIndex((int)SymVector.size(), false)}); SymIndex &V = P.first->second; bool IsNew = P.second; if (V.Idx == -1) { IsNew = true; V = SymIndex((int)SymVector.size(), true); } Symbol *Sym; if (IsNew) { Sym = make(); Sym->InVersionScript = false; Sym->Binding = STB_WEAK; Sym->Visibility = STV_DEFAULT; Sym->IsUsedInRegularObj = false; Sym->ExportDynamic = false; Sym->Traced = V.Traced; Sym->VersionId = Config->DefaultSymbolVersion; SymVector.push_back(Sym); } else { Sym = SymVector[V.Idx]; } return {Sym, IsNew}; } // Find an existing symbol or create and insert a new one, then apply the given // attributes. template std::pair SymbolTable::insert(StringRef Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, InputFile *File) { bool IsUsedInRegularObj = !File || File->kind() == InputFile::ObjectKind; Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); // Merge in the new symbol's visibility. S->Visibility = getMinVisibility(S->Visibility, Visibility); if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) S->ExportDynamic = true; if (IsUsedInRegularObj) S->IsUsedInRegularObj = true; if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && ((Type == STT_TLS) != S->body()->isTls())) { error("TLS attribute mismatch: " + toString(*S->body()) + "\n>>> defined in " + toString(S->body()->File) + "\n>>> defined in " + toString(File)); } return {S, WasInserted}; } template Symbol *SymbolTable::addUndefined(StringRef Name) { return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); } static uint8_t getVisibility(uint8_t StOther) { return StOther & 3; } template Symbol *SymbolTable::addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, InputFile *File) { Symbol *S; bool WasInserted; uint8_t Visibility = getVisibility(StOther); std::tie(S, WasInserted) = insert(Name, Type, Visibility, CanOmitFromDynSym, File); // An undefined symbol with non default visibility must be satisfied // in the same DSO. if (WasInserted || (isa(S->body()) && Visibility != STV_DEFAULT)) { S->Binding = Binding; replaceBody(S, Name, IsLocal, StOther, Type, File); return S; } if (Binding != STB_WEAK) { SymbolBody *B = S->body(); if (B->isShared() || B->isLazy() || B->isUndefined()) S->Binding = Binding; if (auto *SS = dyn_cast(B)) cast>(SS->File)->IsUsed = true; } if (auto *L = dyn_cast(S->body())) { // An undefined weak will not fetch archive members, but we have to remember // its type. See also comment in addLazyArchive. if (S->isWeak()) L->Type = Type; else if (InputFile *F = L->fetch()) addFile(F); } return S; } // We have a new defined symbol with the specified binding. Return 1 if the new // symbol should win, -1 if the new symbol should lose, or 0 if both symbols are // strong defined symbols. static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { if (WasInserted) return 1; SymbolBody *Body = S->body(); if (Body->isLazy() || !Body->isInCurrentDSO()) return 1; if (Binding == STB_WEAK) return -1; if (S->isWeak()) return 1; return 0; } // We have a new non-common defined symbol with the specified binding. Return 1 // if the new symbol should win, -1 if the new symbol should lose, or 0 if there // is a conflict. If the new symbol wins, also update the binding. template static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, bool IsAbsolute, typename ELFT::uint Value) { if (int Cmp = compareDefined(S, WasInserted, Binding)) { if (Cmp > 0) S->Binding = Binding; return Cmp; } SymbolBody *B = S->body(); if (isa(B)) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) warn("common " + S->body()->getName() + " is overridden"); return 1; } else if (auto *R = dyn_cast(B)) { if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute && R->Value == Value) return -1; } return 0; } template Symbol *SymbolTable::addCommon(StringRef N, uint64_t Size, uint32_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(N, Type, getVisibility(StOther), /*CanOmitFromDynSym*/ false, File); int Cmp = compareDefined(S, WasInserted, Binding); if (Cmp > 0) { S->Binding = Binding; replaceBody(S, N, Size, Alignment, StOther, Type, File); } else if (Cmp == 0) { auto *C = dyn_cast(S->body()); if (!C) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) warn("common " + S->body()->getName() + " is overridden"); return S; } if (Config->WarnCommon) warn("multiple common of " + S->body()->getName()); Alignment = C->Alignment = std::max(C->Alignment, Alignment); if (Size > C->Size) replaceBody(S, N, Size, Alignment, StOther, Type, File); } return S; } static void warnOrError(const Twine &Msg) { if (Config->AllowMultipleDefinition) warn(Msg); else error(Msg); } static void reportDuplicate(SymbolBody *Sym, InputFile *NewFile) { warnOrError("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " + toString(Sym->File) + "\n>>> defined in " + toString(NewFile)); } template static void reportDuplicate(SymbolBody *Sym, InputSectionBase *ErrSec, typename ELFT::uint ErrOffset) { DefinedRegular *D = dyn_cast(Sym); if (!D || !D->Section || !ErrSec) { reportDuplicate(Sym, ErrSec ? ErrSec->getFile() : nullptr); return; } // Construct and print an error message in the form of: // // ld.lld: error: duplicate symbol: foo // >>> defined at bar.c:30 // >>> bar.o (/home/alice/src/bar.o) // >>> defined at baz.c:563 // >>> baz.o in archive libbaz.a auto *Sec1 = cast(D->Section); std::string Src1 = Sec1->getSrcMsg(D->Value); std::string Obj1 = Sec1->getObjMsg(D->Value); std::string Src2 = ErrSec->getSrcMsg(ErrOffset); std::string Obj2 = ErrSec->getObjMsg(ErrOffset); std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at "; if (!Src1.empty()) Msg += Src1 + "\n>>> "; Msg += Obj1 + "\n>>> defined at "; if (!Src2.empty()) Msg += Src2 + "\n>>> "; Msg += Obj2; warnOrError(Msg); } template Symbol *SymbolTable::addRegular(StringRef Name, uint8_t StOther, uint8_t Type, uint64_t Value, uint64_t Size, uint8_t Binding, SectionBase *Section, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), /*CanOmitFromDynSym*/ false, File); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding, Section == nullptr, Value); if (Cmp > 0) replaceBody(S, Name, /*IsLocal=*/false, StOther, Type, Value, Size, Section, File); else if (Cmp == 0) reportDuplicate(S->body(), dyn_cast_or_null(Section), Value); return S; } template void SymbolTable::addShared(SharedFile *File, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef) { // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT // as the visibility, which will leave the visibility in the symbol table // unchanged. Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, File); // Make sure we preempt DSO symbols with default visibility. if (Sym.getVisibility() == STV_DEFAULT) S->ExportDynamic = true; SymbolBody *Body = S->body(); // An undefined symbol with non default visibility must be satisfied // in the same DSO. if (WasInserted || (isa(Body) && Body->getVisibility() == STV_DEFAULT)) { replaceBody(S, File, Name, Sym.st_other, Sym.getType(), &Sym, Verdef); if (!S->isWeak()) File->IsUsed = true; } } template Symbol *SymbolTable::addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *F) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, F); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding, /*IsAbs*/ false, /*Value*/ 0); if (Cmp > 0) replaceBody(S, Name, /*IsLocal=*/false, StOther, Type, 0, 0, nullptr, F); else if (Cmp == 0) reportDuplicate(S->body(), F); return S; } template SymbolBody *SymbolTable::find(StringRef Name) { auto It = Symtab.find(CachedHashStringRef(Name)); if (It == Symtab.end()) return nullptr; SymIndex V = It->second; if (V.Idx == -1) return nullptr; return SymVector[V.Idx]->body(); } template SymbolBody *SymbolTable::findInCurrentDSO(StringRef Name) { if (SymbolBody *S = find(Name)) if (S->isInCurrentDSO()) return S; return nullptr; } template Symbol *SymbolTable::addLazyArchive(ArchiveFile *F, const object::Archive::Symbol Sym) { Symbol *S; bool WasInserted; StringRef Name = Sym.getName(); std::tie(S, WasInserted) = insert(Name); if (WasInserted) { replaceBody(S, *F, Sym, SymbolBody::UnknownType); return S; } if (!S->body()->isUndefined()) return S; // Weak undefined symbols should not fetch members from archives. If we were // to keep old symbol we would not know that an archive member was available // if a strong undefined symbol shows up afterwards in the link. If a strong // undefined symbol never shows up, this lazy symbol will get to the end of // the link and must be treated as the weak undefined one. We already marked // this symbol as used when we added it to the symbol table, but we also need // to preserve its type. FIXME: Move the Type field to Symbol. if (S->isWeak()) { replaceBody(S, *F, Sym, S->body()->Type); return S; } std::pair MBInfo = F->getMember(&Sym); if (!MBInfo.first.getBuffer().empty()) addFile(createObjectFile(MBInfo.first, F->getName(), MBInfo.second)); return S; } template void SymbolTable::addLazyObject(StringRef Name, LazyObjectFile &Obj) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); if (WasInserted) { replaceBody(S, Name, Obj, SymbolBody::UnknownType); return; } if (!S->body()->isUndefined()) return; // See comment for addLazyArchive above. if (S->isWeak()) replaceBody(S, Name, Obj, S->body()->Type); else if (InputFile *F = Obj.fetch()) addFile(F); } // Process undefined (-u) flags by loading lazy symbols named by those flags. template void SymbolTable::scanUndefinedFlags() { for (StringRef S : Config->Undefined) if (auto *L = dyn_cast_or_null(find(S))) if (InputFile *File = L->fetch()) addFile(File); } // This function takes care of the case in which shared libraries depend on // the user program (not the other way, which is usual). Shared libraries // may have undefined symbols, expecting that the user program provides // the definitions for them. An example is BSD's __progname symbol. // We need to put such symbols to the main program's .dynsym so that // shared libraries can find them. // Except this, we ignore undefined symbols in DSOs. template void SymbolTable::scanShlibUndefined() { for (SharedFile *File : SharedFiles) { for (StringRef U : File->getUndefinedSymbols()) { SymbolBody *Sym = find(U); if (!Sym || !Sym->isDefined()) continue; Sym->symbol()->ExportDynamic = true; // If -dynamic-list is given, the default version is set to // VER_NDX_LOCAL, which prevents a symbol to be exported via .dynsym. // Set to VER_NDX_GLOBAL so the symbol will be handled as if it were // specified by -dynamic-list. Sym->symbol()->VersionId = VER_NDX_GLOBAL; } } } // Initialize DemangledSyms with a map from demangled symbols to symbol // objects. Used to handle "extern C++" directive in version scripts. // // The map will contain all demangled symbols. That can be very large, // and in LLD we generally want to avoid do anything for each symbol. // Then, why are we doing this? Here's why. // // Users can use "extern C++ {}" directive to match against demangled // C++ symbols. For example, you can write a pattern such as // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this // other than trying to match a pattern against all demangled symbols. // So, if "extern C++" feature is used, we need to demangle all known // symbols. template StringMap> &SymbolTable::getDemangledSyms() { if (!DemangledSyms) { DemangledSyms.emplace(); for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); if (B->isUndefined()) continue; if (Optional S = demangle(B->getName())) (*DemangledSyms)[*S].push_back(B); else (*DemangledSyms)[B->getName()].push_back(B); } } return *DemangledSyms; } template std::vector SymbolTable::findByVersion(SymbolVersion Ver) { if (Ver.IsExternCpp) return getDemangledSyms().lookup(Ver.Name); if (SymbolBody *B = find(Ver.Name)) if (!B->isUndefined()) return {B}; return {}; } template std::vector SymbolTable::findAllByVersion(SymbolVersion Ver) { std::vector Res; StringMatcher M(Ver.Name); if (Ver.IsExternCpp) { for (auto &P : getDemangledSyms()) if (M.match(P.first())) Res.insert(Res.end(), P.second.begin(), P.second.end()); return Res; } for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); if (!B->isUndefined() && M.match(B->getName())) Res.push_back(B); } return Res; } // If there's only one anonymous version definition in a version // script file, the script does not actually define any symbol version, // but just specifies symbols visibilities. template void SymbolTable::handleAnonymousVersion() { for (SymbolVersion &Ver : Config->VersionScriptGlobals) assignExactVersion(Ver, VER_NDX_GLOBAL, "global"); for (SymbolVersion &Ver : Config->VersionScriptGlobals) assignWildcardVersion(Ver, VER_NDX_GLOBAL); for (SymbolVersion &Ver : Config->VersionScriptLocals) assignExactVersion(Ver, VER_NDX_LOCAL, "local"); for (SymbolVersion &Ver : Config->VersionScriptLocals) assignWildcardVersion(Ver, VER_NDX_LOCAL); } // Set symbol versions to symbols. This function handles patterns // containing no wildcard characters. template void SymbolTable::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, StringRef VersionName) { if (Ver.HasWildcard) return; // Get a list of symbols which we need to assign the version to. std::vector Syms = findByVersion(Ver); if (Syms.empty()) { if (Config->NoUndefinedVersion) error("version script assignment of '" + VersionName + "' to symbol '" + Ver.Name + "' failed: symbol not defined"); return; } // Assign the version. for (SymbolBody *B : Syms) { Symbol *Sym = B->symbol(); if (Sym->InVersionScript) warn("duplicate symbol '" + Ver.Name + "' in version script"); Sym->VersionId = VersionId; Sym->InVersionScript = true; } } template void SymbolTable::assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId) { if (!Ver.HasWildcard) return; std::vector Syms = findAllByVersion(Ver); // Exact matching takes precendence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned // to the symbol. This behavior is compatible with GNU. for (SymbolBody *B : Syms) if (B->symbol()->VersionId == Config->DefaultSymbolVersion) B->symbol()->VersionId = VersionId; } +static bool isDefaultVersion(SymbolBody *B) { + return B->isInCurrentDSO() && B->getName().find("@@") != StringRef::npos; +} + // This function processes version scripts by updating VersionId // member of symbols. template void SymbolTable::scanVersionScript() { // Symbol themselves might know their versions because symbols // can contain versions in the form of @. - // Let them parse their names. - if (!Config->VersionDefinitions.empty()) - for (Symbol *Sym : SymVector) - Sym->body()->parseSymbolVersion(); + // Let them parse and update their names to exclude version suffix. + for (Symbol *Sym : SymVector) { + SymbolBody *Body = Sym->body(); + bool IsDefault = isDefaultVersion(Body); + Body->parseSymbolVersion(); + + if (!IsDefault) + continue; + + // @@ means the symbol is the default version. If that's the + // case, the symbol is not used only to resolve of version + // but also undefined unversioned symbols with name . + SymbolBody *S = find(Body->getName()); + if (S && S->isUndefined()) + S->copy(Body); + } // Handle edge cases first. handleAnonymousVersion(); if (Config->VersionDefinitions.empty()) return; // Now we have version definitions, so we need to set version ids to symbols. // Each version definition has a glob pattern, and all symbols that match // with the pattern get that version. // First, we assign versions to exact matching symbols, // i.e. version definitions not containing any glob meta-characters. for (VersionDefinition &V : Config->VersionDefinitions) for (SymbolVersion &Ver : V.Globals) assignExactVersion(Ver, V.Id, V.Name); // Next, we assign versions to fuzzy matching symbols, // i.e. version definitions containing glob meta-characters. // Note that because the last match takes precedence over previous matches, // we iterate over the definitions in the reverse order. for (VersionDefinition &V : llvm::reverse(Config->VersionDefinitions)) for (SymbolVersion &Ver : V.Globals) assignWildcardVersion(Ver, V.Id); } template class elf::SymbolTable; template class elf::SymbolTable; template class elf::SymbolTable; template class elf::SymbolTable; diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h index 316d9c9bf373..4ba101fa5d50 100644 --- a/ELF/SymbolTable.h +++ b/ELF/SymbolTable.h @@ -1,147 +1,147 @@ //===- SymbolTable.h --------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_ELF_SYMBOL_TABLE_H #define LLD_ELF_SYMBOL_TABLE_H #include "InputFiles.h" #include "LTO.h" #include "Strings.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" namespace lld { namespace elf { -class Lazy; + struct Symbol; // SymbolTable is a bucket of all known symbols, including defined, // undefined, or lazy symbols (the last one is symbols in archive // files whose archive members are not yet loaded). // // We put all symbols of all files to a SymbolTable, and the // SymbolTable selects the "best" symbols if there are name // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition // to replace the lazy symbol. The logic is implemented in the // add*() functions, which are called by input files as they are parsed. There // is one add* function per symbol type. template class SymbolTable { typedef typename ELFT::Sym Elf_Sym; public: void addFile(InputFile *File); void addCombinedLTOObject(); void addSymbolAlias(StringRef Alias, StringRef Name); void addSymbolWrap(StringRef Name); void applySymbolRenames(); ArrayRef getSymbols() const { return SymVector; } ArrayRef *> getObjectFiles() const { return ObjectFiles; } ArrayRef getBinaryFiles() const { return BinaryFiles; } ArrayRef *> getSharedFiles() const { return SharedFiles; } DefinedRegular *addAbsolute(StringRef Name, uint8_t Visibility = llvm::ELF::STV_HIDDEN, uint8_t Binding = llvm::ELF::STB_GLOBAL); DefinedRegular *addIgnored(StringRef Name, uint8_t Visibility = llvm::ELF::STV_HIDDEN); Symbol *addUndefined(StringRef Name); Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, InputFile *File); Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, uint64_t Value, uint64_t Size, uint8_t Binding, SectionBase *Section, InputFile *File); void addShared(SharedFile *F, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef); Symbol *addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S); void addLazyObject(StringRef Name, LazyObjectFile &Obj); Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *File); Symbol *addCommon(StringRef N, uint64_t Size, uint32_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, InputFile *File); std::pair insert(StringRef Name); std::pair insert(StringRef Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, InputFile *File); void scanUndefinedFlags(); void scanShlibUndefined(); void scanVersionScript(); SymbolBody *find(StringRef Name); SymbolBody *findInCurrentDSO(StringRef Name); void trace(StringRef Name); private: std::vector findByVersion(SymbolVersion Ver); std::vector findAllByVersion(SymbolVersion Ver); llvm::StringMap> &getDemangledSyms(); void handleAnonymousVersion(); void assignExactVersion(SymbolVersion Ver, uint16_t VersionId, StringRef VersionName); void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId); struct SymIndex { SymIndex(int Idx, bool Traced) : Idx(Idx), Traced(Traced) {} int Idx : 31; unsigned Traced : 1; }; // The order the global symbols are in is not defined. We can use an arbitrary // order, but it has to be reproducible. That is true even when cross linking. // The default hashing of StringRef produces different results on 32 and 64 // bit systems so we use a map to a vector. That is arbitrary, deterministic // but a bit inefficient. // FIXME: Experiment with passing in a custom hashing or sorting the symbols // once symbol resolution is finished. llvm::DenseMap Symtab; std::vector SymVector; // Comdat groups define "link once" sections. If two comdat groups have the // same name, only one of them is linked, and the other is ignored. This set // is used to uniquify them. llvm::DenseSet ComdatGroups; std::vector *> ObjectFiles; std::vector *> SharedFiles; std::vector BitcodeFiles; std::vector BinaryFiles; // Set of .so files to not link the same shared object file more than once. llvm::DenseSet SoNames; // A map from demangled symbol names to their symbol objects. // This mapping is 1:N because two symbols with different versions // can have the same name. We use this map to handle "extern C++ {}" // directive in version scripts. llvm::Optional>> DemangledSyms; // For LTO. std::unique_ptr LTO; }; template struct Symtab { static SymbolTable *X; }; template SymbolTable *Symtab::X; } // namespace elf } // namespace lld #endif diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp index 5dce71a32c9c..e8cd662c69ac 100644 --- a/ELF/Symbols.cpp +++ b/ELF/Symbols.cpp @@ -1,386 +1,401 @@ //===- Symbols.cpp --------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "Error.h" #include "InputFiles.h" #include "InputSection.h" #include "OutputSections.h" #include "Strings.h" #include "SyntheticSections.h" #include "Target.h" #include "Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Path.h" #include using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; DefinedRegular *ElfSym::Bss; DefinedRegular *ElfSym::Etext1; DefinedRegular *ElfSym::Etext2; DefinedRegular *ElfSym::Edata1; DefinedRegular *ElfSym::Edata2; DefinedRegular *ElfSym::End1; DefinedRegular *ElfSym::End2; DefinedRegular *ElfSym::GlobalOffsetTable; DefinedRegular *ElfSym::MipsGp; DefinedRegular *ElfSym::MipsGpDisp; DefinedRegular *ElfSym::MipsLocalGp; static uint64_t getSymVA(const SymbolBody &Body, int64_t &Addend) { switch (Body.kind()) { case SymbolBody::DefinedRegularKind: { auto &D = cast(Body); SectionBase *IS = D.Section; if (auto *ISB = dyn_cast_or_null(IS)) IS = ISB->Repl; // According to the ELF spec reference to a local symbol from outside // the group are not allowed. Unfortunately .eh_frame breaks that rule // and must be treated specially. For now we just replace the symbol with // 0. if (IS == &InputSection::Discarded) return 0; // This is an absolute symbol. if (!IS) return D.Value; uint64_t Offset = D.Value; // An object in an SHF_MERGE section might be referenced via a // section symbol (as a hack for reducing the number of local // symbols). // Depending on the addend, the reference via a section symbol // refers to a different object in the merge section. // Since the objects in the merge section are not necessarily // contiguous in the output, the addend can thus affect the final // VA in a non-linear way. // To make this work, we incorporate the addend into the section // offset (and zero out the addend for later processing) so that // we find the right object in the section. if (D.isSection()) { Offset += Addend; Addend = 0; } const OutputSection *OutSec = IS->getOutputSection(); // In the typical case, this is actually very simple and boils // down to adding together 3 numbers: // 1. The address of the output section. // 2. The offset of the input section within the output section. // 3. The offset within the input section (this addition happens // inside InputSection::getOffset). // // If you understand the data structures involved with this next // line (and how they get built), then you have a pretty good // understanding of the linker. uint64_t VA = (OutSec ? OutSec->Addr : 0) + IS->getOffset(Offset); if (D.isTls() && !Config->Relocatable) { if (!Out::TlsPhdr) fatal(toString(D.File) + " has a STT_TLS symbol but doesn't have a PT_TLS section"); return VA - Out::TlsPhdr->p_vaddr; } return VA; } case SymbolBody::DefinedCommonKind: if (!Config->DefineCommon) return 0; return InX::Common->getParent()->Addr + InX::Common->OutSecOff + cast(Body).Offset; case SymbolBody::SharedKind: { auto &SS = cast(Body); if (SS.NeedsCopy) return SS.CopyRelSec->getParent()->Addr + SS.CopyRelSec->OutSecOff + SS.CopyRelSecOff; if (SS.NeedsPltAddr) return Body.getPltVA(); return 0; } case SymbolBody::UndefinedKind: return 0; case SymbolBody::LazyArchiveKind: case SymbolBody::LazyObjectKind: assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer"); return 0; } llvm_unreachable("invalid symbol kind"); } SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) : SymbolKind(K), NeedsCopy(false), NeedsPltAddr(false), IsLocal(IsLocal), IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), IsInIgot(false), Type(Type), StOther(StOther), Name(Name) {} // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. bool SymbolBody::isPreemptible() const { if (isLocal()) return false; // Shared symbols resolve to the definition in the DSO. The exceptions are // symbols with copy relocations (which resolve to .bss) or preempt plt // entries (which resolve to that plt entry). if (isShared()) return !NeedsCopy && !NeedsPltAddr; // That's all that can be preempted in a non-DSO. if (!Config->Shared) return false; // Only symbols that appear in dynsym can be preempted. if (!symbol()->includeInDynsym()) return false; // Only default visibility symbols can be preempted. if (symbol()->Visibility != STV_DEFAULT) return false; // -Bsymbolic means that definitions are not preempted. if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc())) return !isDefined(); return true; } +// Overwrites all attributes except symbol name with Other's so that +// this symbol becomes an alias to Other. This is useful for handling +// some options such as --wrap. +// +// The reason why we want to keep the symbol name is because, if we +// copy symbol names, we'll end up having symbol tables in resulting +// executables or DSOs containing two or more identical symbols, which +// is just inconvenient. +void SymbolBody::copy(SymbolBody *Other) { + StringRef S = Name; + memcpy(symbol()->Body.buffer, Other->symbol()->Body.buffer, + sizeof(Symbol::Body)); + Name = S; +} + uint64_t SymbolBody::getVA(int64_t Addend) const { uint64_t OutVA = getSymVA(*this, Addend); return OutVA + Addend; } uint64_t SymbolBody::getGotVA() const { return InX::Got->getVA() + getGotOffset(); } uint64_t SymbolBody::getGotOffset() const { return GotIndex * Target->GotEntrySize; } uint64_t SymbolBody::getGotPltVA() const { if (this->IsInIgot) return InX::IgotPlt->getVA() + getGotPltOffset(); return InX::GotPlt->getVA() + getGotPltOffset(); } uint64_t SymbolBody::getGotPltOffset() const { return GotPltIndex * Target->GotPltEntrySize; } uint64_t SymbolBody::getPltVA() const { if (this->IsInIplt) return InX::Iplt->getVA() + PltIndex * Target->PltEntrySize; return InX::Plt->getVA() + Target->PltHeaderSize + PltIndex * Target->PltEntrySize; } template typename ELFT::uint SymbolBody::getSize() const { if (const auto *C = dyn_cast(this)) return C->Size; if (const auto *DR = dyn_cast(this)) return DR->Size; if (const auto *S = dyn_cast(this)) return S->getSize(); return 0; } OutputSection *SymbolBody::getOutputSection() const { if (auto *S = dyn_cast(this)) { if (S->Section) return S->Section->getOutputSection(); return nullptr; } if (auto *S = dyn_cast(this)) { if (S->NeedsCopy) return S->CopyRelSec->getParent(); return nullptr; } if (isa(this)) { if (Config->DefineCommon) return InX::Common->getParent(); return nullptr; } return nullptr; } // If a symbol name contains '@', the characters after that is // a symbol version name. This function parses that. void SymbolBody::parseSymbolVersion() { StringRef S = getName(); size_t Pos = S.find('@'); if (Pos == 0 || Pos == StringRef::npos) return; StringRef Verstr = S.substr(Pos + 1); if (Verstr.empty()) return; // Truncate the symbol name so that it doesn't include the version string. Name = {S.data(), Pos}; // If this is not in this DSO, it is not a definition. if (!isInCurrentDSO()) return; // '@@' in a symbol name means the default version. // It is usually the most recent one. bool IsDefault = (Verstr[0] == '@'); if (IsDefault) Verstr = Verstr.substr(1); for (VersionDefinition &Ver : Config->VersionDefinitions) { if (Ver.Name != Verstr) continue; if (IsDefault) symbol()->VersionId = Ver.Id; else symbol()->VersionId = Ver.Id | VERSYM_HIDDEN; return; } // It is an error if the specified version is not defined. error(toString(File) + ": symbol " + S + " has undefined version " + Verstr); } Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) : SymbolBody(K, Name, IsLocal, StOther, Type) {} template bool DefinedRegular::isMipsPIC() const { typedef typename ELFT::Ehdr Elf_Ehdr; if (!Section || !isFunc()) return false; auto *Sec = cast(Section); const Elf_Ehdr *Hdr = Sec->template getFile()->getObj().getHeader(); return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC || (Hdr->e_flags & EF_MIPS_PIC); } Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, InputFile *File) : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { this->File = File; } DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint32_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File) : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, Type), Alignment(Alignment), Size(Size) { this->File = File; } // If a shared symbol is referred via a copy relocation, its alignment // becomes part of the ABI. This function returns a symbol alignment. // Because symbols don't have alignment attributes, we need to infer that. template uint32_t SharedSymbol::getAlignment() const { auto *File = cast>(this->File); uint32_t SecAlign = File->getSection(getSym())->sh_addralign; uint64_t SymValue = getSym().st_value; uint32_t SymAlign = uint32_t(1) << countTrailingZeros(SymValue); return std::min(SecAlign, SymAlign); } InputFile *Lazy::fetch() { if (auto *S = dyn_cast(this)) return S->fetch(); return cast(this)->fetch(); } LazyArchive::LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S, uint8_t Type) : Lazy(LazyArchiveKind, S.getName(), Type), Sym(S) { this->File = &File; } LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type) : Lazy(LazyObjectKind, Name, Type) { this->File = &File; } InputFile *LazyArchive::fetch() { std::pair MBInfo = file()->getMember(&Sym); // getMember returns an empty buffer if the member was already // read from the library. if (MBInfo.first.getBuffer().empty()) return nullptr; return createObjectFile(MBInfo.first, file()->getName(), MBInfo.second); } InputFile *LazyObject::fetch() { return file()->fetch(); } uint8_t Symbol::computeBinding() const { if (Config->Relocatable) return Binding; if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) return STB_LOCAL; if (VersionId == VER_NDX_LOCAL && body()->isInCurrentDSO()) return STB_LOCAL; if (Config->NoGnuUnique && Binding == STB_GNU_UNIQUE) return STB_GLOBAL; return Binding; } bool Symbol::includeInDynsym() const { if (computeBinding() == STB_LOCAL) return false; return ExportDynamic || body()->isShared() || (body()->isUndefined() && Config->Shared); } // Print out a log message for --trace-symbol. void elf::printTraceSymbol(Symbol *Sym) { SymbolBody *B = Sym->body(); std::string S; if (B->isUndefined()) S = ": reference to "; else if (B->isCommon()) S = ": common definition of "; else S = ": definition of "; message(toString(B->File) + S + B->getName()); } // Returns a symbol for an error message. std::string lld::toString(const SymbolBody &B) { if (Config->Demangle) if (Optional S = demangle(B.getName())) return *S; return B.getName(); } template uint32_t SymbolBody::template getSize() const; template uint32_t SymbolBody::template getSize() const; template uint64_t SymbolBody::template getSize() const; template uint64_t SymbolBody::template getSize() const; template bool DefinedRegular::template isMipsPIC() const; template bool DefinedRegular::template isMipsPIC() const; template bool DefinedRegular::template isMipsPIC() const; template bool DefinedRegular::template isMipsPIC() const; template uint32_t SharedSymbol::template getAlignment() const; template uint32_t SharedSymbol::template getAlignment() const; template uint32_t SharedSymbol::template getAlignment() const; template uint32_t SharedSymbol::template getAlignment() const; diff --git a/ELF/Symbols.h b/ELF/Symbols.h index 406fd8e0f57b..773e1ad9588a 100644 --- a/ELF/Symbols.h +++ b/ELF/Symbols.h @@ -1,412 +1,412 @@ //===- Symbols.h ------------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // All symbols are handled as SymbolBodies regardless of their types. // This file defines various types of SymbolBodies. // //===----------------------------------------------------------------------===// #ifndef LLD_ELF_SYMBOLS_H #define LLD_ELF_SYMBOLS_H #include "InputSection.h" #include "Strings.h" #include "lld/Core/LLVM.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" namespace lld { namespace elf { class ArchiveFile; class BitcodeFile; class InputFile; class LazyObjectFile; template class ObjectFile; class OutputSection; template class SharedFile; struct Symbol; // The base class for real symbol classes. class SymbolBody { public: enum Kind { DefinedFirst, DefinedRegularKind = DefinedFirst, SharedKind, DefinedCommonKind, DefinedLast = DefinedCommonKind, UndefinedKind, LazyArchiveKind, LazyObjectKind, }; SymbolBody(Kind K) : SymbolKind(K) {} Symbol *symbol(); const Symbol *symbol() const { return const_cast(this)->symbol(); } Kind kind() const { return static_cast(SymbolKind); } bool isUndefined() const { return SymbolKind == UndefinedKind; } bool isDefined() const { return SymbolKind <= DefinedLast; } bool isCommon() const { return SymbolKind == DefinedCommonKind; } bool isLazy() const { return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind; } bool isShared() const { return SymbolKind == SharedKind; } bool isInCurrentDSO() const { return !isUndefined() && !isShared(); } bool isLocal() const { return IsLocal; } bool isPreemptible() const; StringRef getName() const { return Name; } - void setName(StringRef S) { Name = S; } uint8_t getVisibility() const { return StOther & 0x3; } void parseSymbolVersion(); + void copy(SymbolBody *Other); bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } uint64_t getVA(int64_t Addend = 0) const; uint64_t getGotOffset() const; uint64_t getGotVA() const; uint64_t getGotPltOffset() const; uint64_t getGotPltVA() const; uint64_t getPltVA() const; template typename ELFT::uint getSize() const; OutputSection *getOutputSection() const; // The file from which this symbol was created. InputFile *File = nullptr; uint32_t DynsymIndex = 0; uint32_t GotIndex = -1; uint32_t GotPltIndex = -1; uint32_t PltIndex = -1; uint32_t GlobalDynIndex = -1; protected: SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); const unsigned SymbolKind : 8; public: // True if the linker has to generate a copy relocation. // For SharedSymbol only. unsigned NeedsCopy : 1; // True the symbol should point to its PLT entry. // For SharedSymbol only. unsigned NeedsPltAddr : 1; // True if this is a local symbol. unsigned IsLocal : 1; // True if this symbol has an entry in the global part of MIPS GOT. unsigned IsInGlobalMipsGot : 1; // True if this symbol is referenced by 32-bit GOT relocations. unsigned Is32BitMipsGot : 1; // True if this symbol is in the Iplt sub-section of the Plt. unsigned IsInIplt : 1; // True if this symbol is in the Igot sub-section of the .got.plt or .got. unsigned IsInIgot : 1; // The following fields have the same meaning as the ELF symbol attributes. uint8_t Type; // symbol type uint8_t StOther; // st_other field value // The Type field may also have this value. It means that we have not yet seen // a non-Lazy symbol with this name, so we don't know what its type is. The // Type field is normally set to this value for Lazy symbols unless we saw a // weak undefined symbol first, in which case we need to remember the original // symbol's type in order to check for TLS mismatches. enum { UnknownType = 255 }; bool isSection() const { return Type == llvm::ELF::STT_SECTION; } bool isTls() const { return Type == llvm::ELF::STT_TLS; } bool isFunc() const { return Type == llvm::ELF::STT_FUNC; } bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; } bool isObject() const { return Type == llvm::ELF::STT_OBJECT; } bool isFile() const { return Type == llvm::ELF::STT_FILE; } protected: StringRefZ Name; }; // The base class for any defined symbols. class Defined : public SymbolBody { public: Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type); static bool classof(const SymbolBody *S) { return S->isDefined(); } }; class DefinedCommon : public Defined { public: DefinedCommon(StringRef N, uint64_t Size, uint32_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File); static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedCommonKind; } // The output offset of this common symbol in the output bss. Computed by the // writer. uint64_t Offset; // The maximum alignment we have seen for this symbol. uint32_t Alignment; uint64_t Size; }; // Regular defined symbols read from object file symbol tables. class DefinedRegular : public Defined { public: DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, uint64_t Value, uint64_t Size, SectionBase *Section, InputFile *File) : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), Value(Value), Size(Size), Section(Section) { this->File = File; } // Return true if the symbol is a PIC function. template bool isMipsPIC() const; static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedRegularKind; } uint64_t Value; uint64_t Size; SectionBase *Section; }; class Undefined : public SymbolBody { public: Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, InputFile *F); static bool classof(const SymbolBody *S) { return S->kind() == UndefinedKind; } }; class SharedSymbol : public Defined { public: static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::SharedKind; } SharedSymbol(InputFile *File, StringRef Name, uint8_t StOther, uint8_t Type, const void *ElfSym, const void *Verdef) : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, StOther, Type), Verdef(Verdef), ElfSym(ElfSym) { // IFuncs defined in DSOs are treated as functions by the static linker. if (isGnuIFunc()) Type = llvm::ELF::STT_FUNC; this->File = File; } template uint64_t getShndx() const { return getSym().st_shndx; } template uint64_t getValue() const { return getSym().st_value; } template uint64_t getSize() const { return getSym().st_size; } template uint32_t getAlignment() const; // This field is a pointer to the symbol's version definition. const void *Verdef; // CopyRelSec and CopyRelSecOff are significant only when NeedsCopy is true. InputSection *CopyRelSec; uint64_t CopyRelSecOff; private: template const typename ELFT::Sym &getSym() const { return *(const typename ELFT::Sym *)ElfSym; } const void *ElfSym; }; // This class represents a symbol defined in an archive file. It is // created from an archive file header, and it knows how to load an // object file from an archive to replace itself with a defined // symbol. If the resolver finds both Undefined and Lazy for // the same name, it will ask the Lazy to load a file. class Lazy : public SymbolBody { public: static bool classof(const SymbolBody *S) { return S->isLazy(); } // Returns an object file for this symbol, or a nullptr if the file // was already returned. InputFile *fetch(); protected: Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {} }; // LazyArchive symbols represents symbols in archive files. class LazyArchive : public Lazy { public: LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S, uint8_t Type); static bool classof(const SymbolBody *S) { return S->kind() == LazyArchiveKind; } ArchiveFile *file() { return (ArchiveFile *)this->File; } InputFile *fetch(); private: const llvm::object::Archive::Symbol Sym; }; // LazyObject symbols represents symbols in object files between // --start-lib and --end-lib options. class LazyObject : public Lazy { public: LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type); static bool classof(const SymbolBody *S) { return S->kind() == LazyObjectKind; } LazyObjectFile *file() { return (LazyObjectFile *)this->File; } InputFile *fetch(); }; // Some linker-generated symbols need to be created as // DefinedRegular symbols. struct ElfSym { // __bss_start static DefinedRegular *Bss; // etext and _etext static DefinedRegular *Etext1; static DefinedRegular *Etext2; // edata and _edata static DefinedRegular *Edata1; static DefinedRegular *Edata2; // end and _end static DefinedRegular *End1; static DefinedRegular *End2; // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to // be at some offset from the base of the .got section, usually 0 or // the end of the .got. static DefinedRegular *GlobalOffsetTable; // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS. static DefinedRegular *MipsGp; static DefinedRegular *MipsGpDisp; static DefinedRegular *MipsLocalGp; }; // A real symbol object, SymbolBody, is usually stored within a Symbol. There's // always one Symbol for each symbol name. The resolver updates the SymbolBody // stored in the Body field of this object as it resolves symbols. Symbol also // holds computed properties of symbol names. struct Symbol { // Symbol binding. This is on the Symbol to track changes during resolution. // In particular: // An undefined weak is still weak when it resolves to a shared library. // An undefined weak will not fetch archive members, but we have to remember // it is weak. uint8_t Binding; // Version definition index. uint16_t VersionId; // Symbol visibility. This is the computed minimum visibility of all // observed non-DSO symbols. unsigned Visibility : 2; // True if the symbol was used for linking and thus need to be added to the // output file's symbol table. This is true for all symbols except for // unreferenced DSO symbols and bitcode symbols that are unreferenced except // by other bitcode objects. unsigned IsUsedInRegularObj : 1; // If this flag is true and the symbol has protected or default visibility, it // will appear in .dynsym. This flag is set by interposable DSO symbols in // executables, by most symbols in DSOs and executables built with // --export-dynamic, and by dynamic lists. unsigned ExportDynamic : 1; // True if this symbol is specified by --trace-symbol option. unsigned Traced : 1; // This symbol version was found in a version script. unsigned InVersionScript : 1; bool includeInDynsym() const; uint8_t computeBinding() const; bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } // This field is used to store the Symbol's SymbolBody. This instantiation of // AlignedCharArrayUnion gives us a struct with a char array field that is // large and aligned enough to store any derived class of SymbolBody. llvm::AlignedCharArrayUnion Body; SymbolBody *body() { return reinterpret_cast(Body.buffer); } const SymbolBody *body() const { return const_cast(this)->body(); } }; void printTraceSymbol(Symbol *Sym); template void replaceBody(Symbol *S, ArgT &&... Arg) { static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); static_assert(alignof(T) <= alignof(decltype(S->Body)), "Body not aligned enough"); assert(static_cast(static_cast(nullptr)) == nullptr && "Not a SymbolBody"); new (S->Body.buffer) T(std::forward(Arg)...); // Print out a log message if --trace-symbol was specified. // This is for debugging. if (S->Traced) printTraceSymbol(S); } inline Symbol *SymbolBody::symbol() { assert(!isLocal()); return reinterpret_cast(reinterpret_cast(this) - offsetof(Symbol, Body)); } } // namespace elf std::string toString(const elf::SymbolBody &B); } // namespace lld #endif diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp index cb1494d427a0..995d05692ee2 100644 --- a/ELF/SyntheticSections.cpp +++ b/ELF/SyntheticSections.cpp @@ -1,2400 +1,2421 @@ //===- SyntheticSections.cpp ----------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains linker-synthesized sections. Currently, // synthetic sections are created either output sections or input sections, // but we are rewriting code so that all synthetic sections are created as // input sections. // //===----------------------------------------------------------------------===// #include "SyntheticSections.h" #include "Config.h" #include "Error.h" #include "InputFiles.h" #include "LinkerScript.h" #include "Memory.h" #include "OutputSections.h" #include "Strings.h" #include "SymbolTable.h" #include "Target.h" #include "Threads.h" #include "Writer.h" #include "lld/Config/Version.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MD5.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/xxhash.h" #include using namespace llvm; using namespace llvm::dwarf; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; uint64_t SyntheticSection::getVA() const { if (OutputSection *Sec = getParent()) return Sec->Addr + OutSecOff; return 0; } template static std::vector getCommonSymbols() { std::vector V; for (Symbol *S : Symtab::X->getSymbols()) if (auto *B = dyn_cast(S->body())) V.push_back(B); return V; } // Find all common symbols and allocate space for them. template InputSection *elf::createCommonSection() { if (!Config->DefineCommon) return nullptr; // Sort the common symbols by alignment as an heuristic to pack them better. std::vector Syms = getCommonSymbols(); if (Syms.empty()) return nullptr; std::stable_sort(Syms.begin(), Syms.end(), [](const DefinedCommon *A, const DefinedCommon *B) { return A->Alignment > B->Alignment; }); BssSection *Sec = make("COMMON"); for (DefinedCommon *Sym : Syms) Sym->Offset = Sec->reserveSpace(Sym->Size, Sym->Alignment); return Sec; } // Returns an LLD version string. static ArrayRef getVersion() { // Check LLD_VERSION first for ease of testing. // You can get consitent output by using the environment variable. // This is only for testing. StringRef S = getenv("LLD_VERSION"); if (S.empty()) S = Saver.save(Twine("Linker: ") + getLLDVersion()); // +1 to include the terminating '\0'. return {(const uint8_t *)S.data(), S.size() + 1}; } // Creates a .comment section containing LLD version info. // With this feature, you can identify LLD-generated binaries easily // by "readelf --string-dump .comment ". // The returned object is a mergeable string section. template MergeInputSection *elf::createCommentSection() { typename ELFT::Shdr Hdr = {}; Hdr.sh_flags = SHF_MERGE | SHF_STRINGS; Hdr.sh_type = SHT_PROGBITS; Hdr.sh_entsize = 1; Hdr.sh_addralign = 1; auto *Ret = make((ObjectFile *)nullptr, &Hdr, ".comment"); Ret->Data = getVersion(); Ret->splitIntoPieces(); return Ret; } // .MIPS.abiflags section. template MipsAbiFlagsSection::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags) : SyntheticSection(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), Flags(Flags) { this->Entsize = sizeof(Elf_Mips_ABIFlags); } template void MipsAbiFlagsSection::writeTo(uint8_t *Buf) { memcpy(Buf, &Flags, sizeof(Flags)); } template MipsAbiFlagsSection *MipsAbiFlagsSection::create() { Elf_Mips_ABIFlags Flags = {}; bool Create = false; for (InputSectionBase *Sec : InputSections) { if (Sec->Type != SHT_MIPS_ABIFLAGS) continue; Sec->Live = false; Create = true; std::string Filename = toString(Sec->getFile()); const size_t Size = Sec->Data.size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential // zero padding) we ignore everything after the first Elf_Mips_ABIFlags if (Size < sizeof(Elf_Mips_ABIFlags)) { error(Filename + ": invalid size of .MIPS.abiflags section: got " + Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); return nullptr; } auto *S = reinterpret_cast(Sec->Data.data()); if (S->version != 0) { error(Filename + ": unexpected .MIPS.abiflags version " + Twine(S->version)); return nullptr; } // LLD checks ISA compatibility in getMipsEFlags(). Here we just // select the highest number of ISA/Rev/Ext. Flags.isa_level = std::max(Flags.isa_level, S->isa_level); Flags.isa_rev = std::max(Flags.isa_rev, S->isa_rev); Flags.isa_ext = std::max(Flags.isa_ext, S->isa_ext); Flags.gpr_size = std::max(Flags.gpr_size, S->gpr_size); Flags.cpr1_size = std::max(Flags.cpr1_size, S->cpr1_size); Flags.cpr2_size = std::max(Flags.cpr2_size, S->cpr2_size); Flags.ases |= S->ases; Flags.flags1 |= S->flags1; Flags.flags2 |= S->flags2; Flags.fp_abi = elf::getMipsFpAbiFlag(Flags.fp_abi, S->fp_abi, Filename); }; if (Create) return make>(Flags); return nullptr; } // .MIPS.options section. template MipsOptionsSection::MipsOptionsSection(Elf_Mips_RegInfo Reginfo) : SyntheticSection(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), Reginfo(Reginfo) { this->Entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); } template void MipsOptionsSection::writeTo(uint8_t *Buf) { auto *Options = reinterpret_cast(Buf); Options->kind = ODK_REGINFO; Options->size = getSize(); if (!Config->Relocatable) Reginfo.ri_gp_value = InX::MipsGot->getGp(); memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo)); } template MipsOptionsSection *MipsOptionsSection::create() { // N64 ABI only. if (!ELFT::Is64Bits) return nullptr; Elf_Mips_RegInfo Reginfo = {}; bool Create = false; for (InputSectionBase *Sec : InputSections) { if (Sec->Type != SHT_MIPS_OPTIONS) continue; Sec->Live = false; Create = true; std::string Filename = toString(Sec->getFile()); ArrayRef D = Sec->Data; while (!D.empty()) { if (D.size() < sizeof(Elf_Mips_Options)) { error(Filename + ": invalid size of .MIPS.options section"); break; } auto *Opt = reinterpret_cast(D.data()); if (Opt->kind == ODK_REGINFO) { if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) error(Filename + ": unsupported non-zero ri_gp_value"); Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; Sec->getFile()->MipsGp0 = Opt->getRegInfo().ri_gp_value; break; } if (!Opt->size) fatal(Filename + ": zero option descriptor size"); D = D.slice(Opt->size); } }; if (Create) return make>(Reginfo); return nullptr; } // MIPS .reginfo section. template MipsReginfoSection::MipsReginfoSection(Elf_Mips_RegInfo Reginfo) : SyntheticSection(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), Reginfo(Reginfo) { this->Entsize = sizeof(Elf_Mips_RegInfo); } template void MipsReginfoSection::writeTo(uint8_t *Buf) { if (!Config->Relocatable) Reginfo.ri_gp_value = InX::MipsGot->getGp(); memcpy(Buf, &Reginfo, sizeof(Reginfo)); } template MipsReginfoSection *MipsReginfoSection::create() { // Section should be alive for O32 and N32 ABIs only. if (ELFT::Is64Bits) return nullptr; Elf_Mips_RegInfo Reginfo = {}; bool Create = false; for (InputSectionBase *Sec : InputSections) { if (Sec->Type != SHT_MIPS_REGINFO) continue; Sec->Live = false; Create = true; if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { error(toString(Sec->getFile()) + ": invalid size of .reginfo section"); return nullptr; } auto *R = reinterpret_cast(Sec->Data.data()); if (Config->Relocatable && R->ri_gp_value) error(toString(Sec->getFile()) + ": unsupported non-zero ri_gp_value"); Reginfo.ri_gprmask |= R->ri_gprmask; Sec->getFile()->MipsGp0 = R->ri_gp_value; }; if (Create) return make>(Reginfo); return nullptr; } InputSection *elf::createInterpSection() { // StringSaver guarantees that the returned string ends with '\0'. StringRef S = Saver.save(Config->DynamicLinker); ArrayRef Contents = {(const uint8_t *)S.data(), S.size() + 1}; auto *Sec = make(SHF_ALLOC, SHT_PROGBITS, 1, Contents, ".interp"); Sec->Live = true; return Sec; } SymbolBody *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, uint64_t Size, InputSectionBase *Section) { auto *S = make(Name, /*IsLocal*/ true, STV_DEFAULT, Type, Value, Size, Section, nullptr); if (InX::SymTab) InX::SymTab->addSymbol(S); return S; } static size_t getHashSize() { switch (Config->BuildId) { case BuildIdKind::Fast: return 8; case BuildIdKind::Md5: case BuildIdKind::Uuid: return 16; case BuildIdKind::Sha1: return 20; case BuildIdKind::Hexstring: return Config->BuildIdVector.size(); default: llvm_unreachable("unknown BuildIdKind"); } } BuildIdSection::BuildIdSection() : SyntheticSection(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), HashSize(getHashSize()) {} void BuildIdSection::writeTo(uint8_t *Buf) { endianness E = Config->Endianness; write32(Buf, 4, E); // Name size write32(Buf + 4, HashSize, E); // Content size write32(Buf + 8, NT_GNU_BUILD_ID, E); // Type memcpy(Buf + 12, "GNU", 4); // Name string HashBuf = Buf + 16; } // Split one uint8 array into small pieces of uint8 arrays. static std::vector> split(ArrayRef Arr, size_t ChunkSize) { std::vector> Ret; while (Arr.size() > ChunkSize) { Ret.push_back(Arr.take_front(ChunkSize)); Arr = Arr.drop_front(ChunkSize); } if (!Arr.empty()) Ret.push_back(Arr); return Ret; } // Computes a hash value of Data using a given hash function. // In order to utilize multiple cores, we first split data into 1MB // chunks, compute a hash for each chunk, and then compute a hash value // of the hash values. void BuildIdSection::computeHash( llvm::ArrayRef Data, std::function Arr)> HashFn) { std::vector> Chunks = split(Data, 1024 * 1024); std::vector Hashes(Chunks.size() * HashSize); // Compute hash values. parallelForEachN(0, Chunks.size(), [&](size_t I) { HashFn(Hashes.data() + I * HashSize, Chunks[I]); }); // Write to the final output buffer. HashFn(HashBuf, Hashes); } BssSection::BssSection(StringRef Name) : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 0, Name) {} size_t BssSection::reserveSpace(uint64_t Size, uint32_t Alignment) { if (OutputSection *Sec = getParent()) Sec->updateAlignment(Alignment); this->Size = alignTo(this->Size, Alignment) + Size; this->Alignment = std::max(this->Alignment, Alignment); return this->Size - Size; } void BuildIdSection::writeBuildId(ArrayRef Buf) { switch (Config->BuildId) { case BuildIdKind::Fast: computeHash(Buf, [](uint8_t *Dest, ArrayRef Arr) { write64le(Dest, xxHash64(toStringRef(Arr))); }); break; case BuildIdKind::Md5: computeHash(Buf, [](uint8_t *Dest, ArrayRef Arr) { memcpy(Dest, MD5::hash(Arr).data(), 16); }); break; case BuildIdKind::Sha1: computeHash(Buf, [](uint8_t *Dest, ArrayRef Arr) { memcpy(Dest, SHA1::hash(Arr).data(), 20); }); break; case BuildIdKind::Uuid: if (getRandomBytes(HashBuf, HashSize)) error("entropy source failure"); break; case BuildIdKind::Hexstring: memcpy(HashBuf, Config->BuildIdVector.data(), Config->BuildIdVector.size()); break; default: llvm_unreachable("unknown BuildIdKind"); } } template EhFrameSection::EhFrameSection() : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {} // Search for an existing CIE record or create a new one. // CIE records from input object files are uniquified by their contents // and where their relocations point to. template template CieRecord *EhFrameSection::addCie(EhSectionPiece &Piece, ArrayRef Rels) { auto *Sec = cast(Piece.ID); const endianness E = ELFT::TargetEndianness; if (read32(Piece.data().data() + 4) != 0) fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); SymbolBody *Personality = nullptr; unsigned FirstRelI = Piece.FirstRelocation; if (FirstRelI != (unsigned)-1) Personality = &Sec->template getFile()->getRelocTargetSym(Rels[FirstRelI]); // Search for an existing CIE by CIE contents/relocation target pair. CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; // If not found, create a new one. if (Cie->Piece == nullptr) { Cie->Piece = &Piece; Cies.push_back(Cie); } return Cie; } // There is one FDE per function. Returns true if a given FDE // points to a live function. template template bool EhFrameSection::isFdeLive(EhSectionPiece &Piece, ArrayRef Rels) { auto *Sec = cast(Piece.ID); unsigned FirstRelI = Piece.FirstRelocation; if (FirstRelI == (unsigned)-1) return false; const RelTy &Rel = Rels[FirstRelI]; SymbolBody &B = Sec->template getFile()->getRelocTargetSym(Rel); auto *D = dyn_cast(&B); if (!D || !D->Section) return false; auto *Target = cast(cast(D->Section)->Repl); return Target && Target->Live; } // .eh_frame is a sequence of CIE or FDE records. In general, there // is one CIE record per input object file which is followed by // a list of FDEs. This function searches an existing CIE or create a new // one and associates FDEs to the CIE. template template void EhFrameSection::addSectionAux(EhInputSection *Sec, ArrayRef Rels) { const endianness E = ELFT::TargetEndianness; DenseMap OffsetToCie; for (EhSectionPiece &Piece : Sec->Pieces) { // The empty record is the end marker. if (Piece.size() == 4) return; size_t Offset = Piece.InputOff; uint32_t ID = read32(Piece.data().data() + 4); if (ID == 0) { OffsetToCie[Offset] = addCie(Piece, Rels); continue; } uint32_t CieOffset = Offset + 4 - ID; CieRecord *Cie = OffsetToCie[CieOffset]; if (!Cie) fatal(toString(Sec) + ": invalid CIE reference"); if (!isFdeLive(Piece, Rels)) continue; Cie->FdePieces.push_back(&Piece); NumFdes++; } } template void EhFrameSection::addSection(InputSectionBase *C) { auto *Sec = cast(C); Sec->Parent = this; updateAlignment(Sec->Alignment); Sections.push_back(Sec); for (auto *DS : Sec->DependentSections) DependentSections.push_back(DS); // .eh_frame is a sequence of CIE or FDE records. This function // splits it into pieces so that we can call // SplitInputSection::getSectionPiece on the section. Sec->split(); if (Sec->Pieces.empty()) return; if (Sec->NumRelocations) { if (Sec->AreRelocsRela) addSectionAux(Sec, Sec->template relas()); else addSectionAux(Sec, Sec->template rels()); return; } addSectionAux(Sec, makeArrayRef(nullptr, nullptr)); } template static void writeCieFde(uint8_t *Buf, ArrayRef D) { memcpy(Buf, D.data(), D.size()); // Fix the size field. -4 since size does not include the size field itself. const endianness E = ELFT::TargetEndianness; write32(Buf, alignTo(D.size(), sizeof(typename ELFT::uint)) - 4); } template void EhFrameSection::finalizeContents() { if (this->Size) return; // Already finalized. size_t Off = 0; for (CieRecord *Cie : Cies) { Cie->Piece->OutputOff = Off; Off += alignTo(Cie->Piece->size(), Config->Wordsize); for (EhSectionPiece *Fde : Cie->FdePieces) { Fde->OutputOff = Off; Off += alignTo(Fde->size(), Config->Wordsize); } } // The LSB standard does not allow a .eh_frame section with zero // Call Frame Information records. Therefore add a CIE record length // 0 as a terminator if this .eh_frame section is empty. if (Off == 0) Off = 4; this->Size = Off; } template static uint64_t readFdeAddr(uint8_t *Buf, int Size) { const endianness E = ELFT::TargetEndianness; switch (Size) { case DW_EH_PE_udata2: return read16(Buf); case DW_EH_PE_udata4: return read32(Buf); case DW_EH_PE_udata8: return read64(Buf); case DW_EH_PE_absptr: if (ELFT::Is64Bits) return read64(Buf); return read32(Buf); } fatal("unknown FDE size encoding"); } // Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. // We need it to create .eh_frame_hdr section. template uint64_t EhFrameSection::getFdePc(uint8_t *Buf, size_t FdeOff, uint8_t Enc) { // The starting address to which this FDE applies is // stored at FDE + 8 byte. size_t Off = FdeOff + 8; uint64_t Addr = readFdeAddr(Buf + Off, Enc & 0x7); if ((Enc & 0x70) == DW_EH_PE_absptr) return Addr; if ((Enc & 0x70) == DW_EH_PE_pcrel) return Addr + getParent()->Addr + Off; fatal("unknown FDE size relative encoding"); } template void EhFrameSection::writeTo(uint8_t *Buf) { const endianness E = ELFT::TargetEndianness; for (CieRecord *Cie : Cies) { size_t CieOffset = Cie->Piece->OutputOff; writeCieFde(Buf + CieOffset, Cie->Piece->data()); for (EhSectionPiece *Fde : Cie->FdePieces) { size_t Off = Fde->OutputOff; writeCieFde(Buf + Off, Fde->data()); // FDE's second word should have the offset to an associated CIE. // Write it. write32(Buf + Off + 4, Off + 4 - CieOffset); } } for (EhInputSection *S : Sections) S->relocateAlloc(Buf, nullptr); // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table // to get a FDE from an address to which FDE is applied. So here // we obtain two addresses and pass them to EhFrameHdr object. if (In::EhFrameHdr) { for (CieRecord *Cie : Cies) { uint8_t Enc = getFdeEncoding(Cie->Piece); for (SectionPiece *Fde : Cie->FdePieces) { uint64_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); uint64_t FdeVA = getParent()->Addr + Fde->OutputOff; In::EhFrameHdr->addFde(Pc, FdeVA); } } } } GotSection::GotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Target->GotEntrySize, ".got") {} void GotSection::addEntry(SymbolBody &Sym) { Sym.GotIndex = NumEntries; ++NumEntries; } bool GotSection::addDynTlsEntry(SymbolBody &Sym) { if (Sym.GlobalDynIndex != -1U) return false; Sym.GlobalDynIndex = NumEntries; // Global Dynamic TLS entries take two GOT slots. NumEntries += 2; return true; } // Reserves TLS entries for a TLS module ID and a TLS block offset. // In total it takes two GOT slots. bool GotSection::addTlsIndex() { if (TlsIndexOff != uint32_t(-1)) return false; TlsIndexOff = NumEntries * Config->Wordsize; NumEntries += 2; return true; } uint64_t GotSection::getGlobalDynAddr(const SymbolBody &B) const { return this->getVA() + B.GlobalDynIndex * Config->Wordsize; } uint64_t GotSection::getGlobalDynOffset(const SymbolBody &B) const { return B.GlobalDynIndex * Config->Wordsize; } void GotSection::finalizeContents() { Size = NumEntries * Config->Wordsize; } bool GotSection::empty() const { // If we have a relocation that is relative to GOT (such as GOTOFFREL), // we need to emit a GOT even if it's empty. return NumEntries == 0 && !HasGotOffRel; } void GotSection::writeTo(uint8_t *Buf) { relocateAlloc(Buf, Buf + Size); } MipsGotSection::MipsGotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16, ".got") {} void MipsGotSection::addEntry(SymbolBody &Sym, int64_t Addend, RelExpr Expr) { // For "true" local symbols which can be referenced from the same module // only compiler creates two instructions for address loading: // // lw $8, 0($gp) # R_MIPS_GOT16 // addi $8, $8, 0 # R_MIPS_LO16 // // The first instruction loads high 16 bits of the symbol address while // the second adds an offset. That allows to reduce number of required // GOT entries because only one global offset table entry is necessary // for every 64 KBytes of local data. So for local symbols we need to // allocate number of GOT entries to hold all required "page" addresses. // // All global symbols (hidden and regular) considered by compiler uniformly. // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation // to load address of the symbol. So for each such symbol we need to // allocate dedicated GOT entry to store its address. // // If a symbol is preemptible we need help of dynamic linker to get its // final address. The corresponding GOT entries are allocated in the // "global" part of GOT. Entries for non preemptible global symbol allocated // in the "local" part of GOT. // // See "Global Offset Table" in Chapter 5: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf if (Expr == R_MIPS_GOT_LOCAL_PAGE) { // At this point we do not know final symbol value so to reduce number // of allocated GOT entries do the following trick. Save all output // sections referenced by GOT relocations. Then later in the `finalize` // method calculate number of "pages" required to cover all saved output // section and allocate appropriate number of GOT entries. PageIndexMap.insert({Sym.getOutputSection(), 0}); return; } if (Sym.isTls()) { // GOT entries created for MIPS TLS relocations behave like // almost GOT entries from other ABIs. They go to the end // of the global offset table. Sym.GotIndex = TlsEntries.size(); TlsEntries.push_back(&Sym); return; } auto AddEntry = [&](SymbolBody &S, uint64_t A, GotEntries &Items) { if (S.isInGot() && !A) return; size_t NewIndex = Items.size(); if (!EntryIndexMap.insert({{&S, A}, NewIndex}).second) return; Items.emplace_back(&S, A); if (!A) S.GotIndex = NewIndex; }; if (Sym.isPreemptible()) { // Ignore addends for preemptible symbols. They got single GOT entry anyway. AddEntry(Sym, 0, GlobalEntries); Sym.IsInGlobalMipsGot = true; } else if (Expr == R_MIPS_GOT_OFF32) { AddEntry(Sym, Addend, LocalEntries32); Sym.Is32BitMipsGot = true; } else { // Hold local GOT entries accessed via a 16-bit index separately. // That allows to write them in the beginning of the GOT and keep // their indexes as less as possible to escape relocation's overflow. AddEntry(Sym, Addend, LocalEntries); } } bool MipsGotSection::addDynTlsEntry(SymbolBody &Sym) { if (Sym.GlobalDynIndex != -1U) return false; Sym.GlobalDynIndex = TlsEntries.size(); // Global Dynamic TLS entries take two GOT slots. TlsEntries.push_back(nullptr); TlsEntries.push_back(&Sym); return true; } // Reserves TLS entries for a TLS module ID and a TLS block offset. // In total it takes two GOT slots. bool MipsGotSection::addTlsIndex() { if (TlsIndexOff != uint32_t(-1)) return false; TlsIndexOff = TlsEntries.size() * Config->Wordsize; TlsEntries.push_back(nullptr); TlsEntries.push_back(nullptr); return true; } static uint64_t getMipsPageAddr(uint64_t Addr) { return (Addr + 0x8000) & ~0xffff; } static uint64_t getMipsPageCount(uint64_t Size) { return (Size + 0xfffe) / 0xffff + 1; } uint64_t MipsGotSection::getPageEntryOffset(const SymbolBody &B, int64_t Addend) const { const OutputSection *OutSec = B.getOutputSection(); uint64_t SecAddr = getMipsPageAddr(OutSec->Addr); uint64_t SymAddr = getMipsPageAddr(B.getVA(Addend)); uint64_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; assert(Index < PageEntriesNum); return (HeaderEntriesNum + Index) * Config->Wordsize; } uint64_t MipsGotSection::getBodyEntryOffset(const SymbolBody &B, int64_t Addend) const { // Calculate offset of the GOT entries block: TLS, global, local. uint64_t Index = HeaderEntriesNum + PageEntriesNum; if (B.isTls()) Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); else if (B.IsInGlobalMipsGot) Index += LocalEntries.size() + LocalEntries32.size(); else if (B.Is32BitMipsGot) Index += LocalEntries.size(); // Calculate offset of the GOT entry in the block. if (B.isInGot()) Index += B.GotIndex; else { auto It = EntryIndexMap.find({&B, Addend}); assert(It != EntryIndexMap.end()); Index += It->second; } return Index * Config->Wordsize; } uint64_t MipsGotSection::getTlsOffset() const { return (getLocalEntriesNum() + GlobalEntries.size()) * Config->Wordsize; } uint64_t MipsGotSection::getGlobalDynOffset(const SymbolBody &B) const { return B.GlobalDynIndex * Config->Wordsize; } const SymbolBody *MipsGotSection::getFirstGlobalEntry() const { return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; } unsigned MipsGotSection::getLocalEntriesNum() const { return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + LocalEntries32.size(); } void MipsGotSection::finalizeContents() { updateAllocSize(); } void MipsGotSection::updateAllocSize() { PageEntriesNum = 0; for (std::pair &P : PageIndexMap) { // For each output section referenced by GOT page relocations calculate // and save into PageIndexMap an upper bound of MIPS GOT entries required // to store page addresses of local symbols. We assume the worst case - // each 64kb page of the output section has at least one GOT relocation // against it. And take in account the case when the section intersects // page boundaries. P.second = PageEntriesNum; PageEntriesNum += getMipsPageCount(P.first->Size); } Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * Config->Wordsize; } bool MipsGotSection::empty() const { // We add the .got section to the result for dynamic MIPS target because // its address and properties are mentioned in the .dynamic section. return Config->Relocatable; } uint64_t MipsGotSection::getGp() const { return ElfSym::MipsGp->getVA(0); } static uint64_t readUint(uint8_t *Buf) { if (Config->Is64) return read64(Buf, Config->Endianness); return read32(Buf, Config->Endianness); } static void writeUint(uint8_t *Buf, uint64_t Val) { if (Config->Is64) write64(Buf, Val, Config->Endianness); else write32(Buf, Val, Config->Endianness); } void MipsGotSection::writeTo(uint8_t *Buf) { // Set the MSB of the second GOT slot. This is not required by any // MIPS ABI documentation, though. // // There is a comment in glibc saying that "The MSB of got[1] of a // gnu object is set to identify gnu objects," and in GNU gold it // says "the second entry will be used by some runtime loaders". // But how this field is being used is unclear. // // We are not really willing to mimic other linkers behaviors // without understanding why they do that, but because all files // generated by GNU tools have this special GOT value, and because // we've been doing this for years, it is probably a safe bet to // keep doing this for now. We really need to revisit this to see // if we had to do this. writeUint(Buf + Config->Wordsize, (uint64_t)1 << (Config->Wordsize * 8 - 1)); Buf += HeaderEntriesNum * Config->Wordsize; // Write 'page address' entries to the local part of the GOT. for (std::pair &L : PageIndexMap) { size_t PageCount = getMipsPageCount(L.first->Size); uint64_t FirstPageAddr = getMipsPageAddr(L.first->Addr); for (size_t PI = 0; PI < PageCount; ++PI) { uint8_t *Entry = Buf + (L.second + PI) * Config->Wordsize; writeUint(Entry, FirstPageAddr + PI * 0x10000); } } Buf += PageEntriesNum * Config->Wordsize; auto AddEntry = [&](const GotEntry &SA) { uint8_t *Entry = Buf; Buf += Config->Wordsize; const SymbolBody *Body = SA.first; uint64_t VA = Body->getVA(SA.second); writeUint(Entry, VA); }; std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); std::for_each(std::begin(GlobalEntries), std::end(GlobalEntries), AddEntry); // Initialize TLS-related GOT entries. If the entry has a corresponding // dynamic relocations, leave it initialized by zero. Write down adjusted // TLS symbol's values otherwise. To calculate the adjustments use offsets // for thread-local storage. // https://www.linux-mips.org/wiki/NPTL if (TlsIndexOff != -1U && !Config->Pic) writeUint(Buf + TlsIndexOff, 1); for (const SymbolBody *B : TlsEntries) { if (!B || B->isPreemptible()) continue; uint64_t VA = B->getVA(); if (B->GotIndex != -1U) { uint8_t *Entry = Buf + B->GotIndex * Config->Wordsize; writeUint(Entry, VA - 0x7000); } if (B->GlobalDynIndex != -1U) { uint8_t *Entry = Buf + B->GlobalDynIndex * Config->Wordsize; writeUint(Entry, 1); Entry += Config->Wordsize; writeUint(Entry, VA - 0x8000); } } } GotPltSection::GotPltSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Target->GotPltEntrySize, ".got.plt") {} void GotPltSection::addEntry(SymbolBody &Sym) { Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); Entries.push_back(&Sym); } size_t GotPltSection::getSize() const { return (Target->GotPltHeaderEntriesNum + Entries.size()) * Target->GotPltEntrySize; } void GotPltSection::writeTo(uint8_t *Buf) { Target->writeGotPltHeader(Buf); Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; for (const SymbolBody *B : Entries) { Target->writeGotPlt(Buf, *B); Buf += Config->Wordsize; } } // On ARM the IgotPltSection is part of the GotSection, on other Targets it is // part of the .got.plt IgotPltSection::IgotPltSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Target->GotPltEntrySize, Config->EMachine == EM_ARM ? ".got" : ".got.plt") {} void IgotPltSection::addEntry(SymbolBody &Sym) { Sym.IsInIgot = true; Sym.GotPltIndex = Entries.size(); Entries.push_back(&Sym); } size_t IgotPltSection::getSize() const { return Entries.size() * Target->GotPltEntrySize; } void IgotPltSection::writeTo(uint8_t *Buf) { for (const SymbolBody *B : Entries) { Target->writeIgotPlt(Buf, *B); Buf += Config->Wordsize; } } StringTableSection::StringTableSection(StringRef Name, bool Dynamic) : SyntheticSection(Dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, Name), Dynamic(Dynamic) { // ELF string tables start with a NUL byte. addString(""); } // Adds a string to the string table. If HashIt is true we hash and check for // duplicates. It is optional because the name of global symbols are already // uniqued and hashing them again has a big cost for a small value: uniquing // them with some other string that happens to be the same. unsigned StringTableSection::addString(StringRef S, bool HashIt) { if (HashIt) { auto R = StringMap.insert(std::make_pair(S, this->Size)); if (!R.second) return R.first->second; } unsigned Ret = this->Size; this->Size = this->Size + S.size() + 1; Strings.push_back(S); return Ret; } void StringTableSection::writeTo(uint8_t *Buf) { for (StringRef S : Strings) { memcpy(Buf, S.data(), S.size()); Buf += S.size() + 1; } } // Returns the number of version definition entries. Because the first entry // is for the version definition itself, it is the number of versioned symbols // plus one. Note that we don't support multiple versions yet. static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } template DynamicSection::DynamicSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, Config->Wordsize, ".dynamic") { this->Entsize = ELFT::Is64Bits ? 16 : 8; // .dynamic section is not writable on MIPS and on Fuchsia OS // which passes -z rodynamic. // See "Special Section" in Chapter 4 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf if (Config->EMachine == EM_MIPS || Config->ZRodynamic) this->Flags = SHF_ALLOC; addEntries(); } // There are some dynamic entries that don't depend on other sections. // Such entries can be set early. template void DynamicSection::addEntries() { // Add strings to .dynstr early so that .dynstr's size will be // fixed early. for (StringRef S : Config->AuxiliaryList) add({DT_AUXILIARY, InX::DynStrTab->addString(S)}); if (!Config->Rpath.empty()) add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, InX::DynStrTab->addString(Config->Rpath)}); for (SharedFile *F : Symtab::X->getSharedFiles()) if (F->isNeeded()) add({DT_NEEDED, InX::DynStrTab->addString(F->SoName)}); if (!Config->SoName.empty()) add({DT_SONAME, InX::DynStrTab->addString(Config->SoName)}); // Set DT_FLAGS and DT_FLAGS_1. uint32_t DtFlags = 0; uint32_t DtFlags1 = 0; if (Config->Bsymbolic) DtFlags |= DF_SYMBOLIC; if (Config->ZNodelete) DtFlags1 |= DF_1_NODELETE; if (Config->ZNodlopen) DtFlags1 |= DF_1_NOOPEN; if (Config->ZNow) { DtFlags |= DF_BIND_NOW; DtFlags1 |= DF_1_NOW; } if (Config->ZOrigin) { DtFlags |= DF_ORIGIN; DtFlags1 |= DF_1_ORIGIN; } if (DtFlags) add({DT_FLAGS, DtFlags}); if (DtFlags1) add({DT_FLAGS_1, DtFlags1}); // DT_DEBUG is a pointer to debug informaion used by debuggers at runtime. We // need it for each process, so we don't write it for DSOs. The loader writes // the pointer into this entry. // // DT_DEBUG is the only .dynamic entry that needs to be written to. Some // systems (currently only Fuchsia OS) provide other means to give the // debugger this information. Such systems may choose make .dynamic read-only. // If the target is such a system (used -z rodynamic) don't write DT_DEBUG. if (!Config->Shared && !Config->Relocatable && !Config->ZRodynamic) add({DT_DEBUG, (uint64_t)0}); } // Add remaining entries to complete .dynamic contents. template void DynamicSection::finalizeContents() { if (this->Size) return; // Already finalized. this->Link = InX::DynStrTab->getParent()->SectionIndex; if (In::RelaDyn->getParent()->Size > 0) { bool IsRela = Config->IsRela; add({IsRela ? DT_RELA : DT_REL, In::RelaDyn}); add({IsRela ? DT_RELASZ : DT_RELSZ, In::RelaDyn->getParent()->Size}); add({IsRela ? DT_RELAENT : DT_RELENT, uint64_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); // MIPS dynamic loader does not support RELCOUNT tag. // The problem is in the tight relation between dynamic // relocations and GOT. So do not emit this tag on MIPS. if (Config->EMachine != EM_MIPS) { size_t NumRelativeRels = In::RelaDyn->getRelativeRelocCount(); if (Config->ZCombreloc && NumRelativeRels) add({IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels}); } } if (In::RelaPlt->getParent()->Size > 0) { add({DT_JMPREL, In::RelaPlt}); add({DT_PLTRELSZ, In::RelaPlt->getParent()->Size}); - add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, - InX::GotPlt}); + switch (Config->EMachine) { + case EM_MIPS: + add({DT_MIPS_PLTGOT, In::GotPlt}); + break; + case EM_SPARCV9: + add({DT_PLTGOT, In::Plt}); + break; + default: + add({DT_PLTGOT, In::GotPlt}); + break; + } add({DT_PLTREL, uint64_t(Config->IsRela ? DT_RELA : DT_REL)}); } add({DT_SYMTAB, InX::DynSymTab}); add({DT_SYMENT, sizeof(Elf_Sym)}); add({DT_STRTAB, InX::DynStrTab}); add({DT_STRSZ, InX::DynStrTab->getSize()}); if (!Config->ZText) add({DT_TEXTREL, (uint64_t)0}); if (InX::GnuHashTab) add({DT_GNU_HASH, InX::GnuHashTab}); if (In::HashTab) add({DT_HASH, In::HashTab}); if (Out::PreinitArray) { add({DT_PREINIT_ARRAY, Out::PreinitArray}); add({DT_PREINIT_ARRAYSZ, Out::PreinitArray, Entry::SecSize}); } if (Out::InitArray) { add({DT_INIT_ARRAY, Out::InitArray}); add({DT_INIT_ARRAYSZ, Out::InitArray, Entry::SecSize}); } if (Out::FiniArray) { add({DT_FINI_ARRAY, Out::FiniArray}); add({DT_FINI_ARRAYSZ, Out::FiniArray, Entry::SecSize}); } if (SymbolBody *B = Symtab::X->findInCurrentDSO(Config->Init)) add({DT_INIT, B}); if (SymbolBody *B = Symtab::X->findInCurrentDSO(Config->Fini)) add({DT_FINI, B}); bool HasVerNeed = In::VerNeed->getNeedNum() != 0; if (HasVerNeed || In::VerDef) add({DT_VERSYM, In::VerSym}); if (In::VerDef) { add({DT_VERDEF, In::VerDef}); add({DT_VERDEFNUM, getVerDefNum()}); } if (HasVerNeed) { add({DT_VERNEED, In::VerNeed}); add({DT_VERNEEDNUM, In::VerNeed->getNeedNum()}); } if (Config->EMachine == EM_MIPS) { add({DT_MIPS_RLD_VERSION, 1}); add({DT_MIPS_FLAGS, RHF_NOTPOT}); add({DT_MIPS_BASE_ADDRESS, Config->ImageBase}); add({DT_MIPS_SYMTABNO, InX::DynSymTab->getNumSymbols()}); add({DT_MIPS_LOCAL_GOTNO, InX::MipsGot->getLocalEntriesNum()}); if (const SymbolBody *B = InX::MipsGot->getFirstGlobalEntry()) add({DT_MIPS_GOTSYM, B->DynsymIndex}); else add({DT_MIPS_GOTSYM, InX::DynSymTab->getNumSymbols()}); add({DT_PLTGOT, InX::MipsGot}); if (InX::MipsRldMap) add({DT_MIPS_RLD_MAP, InX::MipsRldMap}); } getParent()->Link = this->Link; // +1 for DT_NULL this->Size = (Entries.size() + 1) * this->Entsize; } template void DynamicSection::writeTo(uint8_t *Buf) { auto *P = reinterpret_cast(Buf); for (const Entry &E : Entries) { P->d_tag = E.Tag; switch (E.Kind) { case Entry::SecAddr: P->d_un.d_ptr = E.OutSec->Addr; break; case Entry::InSecAddr: P->d_un.d_ptr = E.InSec->getParent()->Addr + E.InSec->OutSecOff; break; case Entry::SecSize: P->d_un.d_val = E.OutSec->Size; break; case Entry::SymAddr: P->d_un.d_ptr = E.Sym->getVA(); break; case Entry::PlainInt: P->d_un.d_val = E.Val; break; } ++P; } } uint64_t DynamicReloc::getOffset() const { return InputSec->getOutputSection()->Addr + InputSec->getOffset(OffsetInSec); } int64_t DynamicReloc::getAddend() const { if (UseSymVA) return Sym->getVA(Addend); return Addend; } uint32_t DynamicReloc::getSymIndex() const { if (Sym && !UseSymVA) return Sym->DynsymIndex; return 0; } template RelocationSection::RelocationSection(StringRef Name, bool Sort) : SyntheticSection(SHF_ALLOC, Config->IsRela ? SHT_RELA : SHT_REL, Config->Wordsize, Name), Sort(Sort) { this->Entsize = Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } template void RelocationSection::addReloc(const DynamicReloc &Reloc) { if (Reloc.Type == Target->RelativeRel) ++NumRelativeRelocs; Relocs.push_back(Reloc); } template static bool compRelocations(const RelTy &A, const RelTy &B) { bool AIsRel = A.getType(Config->IsMips64EL) == Target->RelativeRel; bool BIsRel = B.getType(Config->IsMips64EL) == Target->RelativeRel; if (AIsRel != BIsRel) return AIsRel; return A.getSymbol(Config->IsMips64EL) < B.getSymbol(Config->IsMips64EL); } template void RelocationSection::writeTo(uint8_t *Buf) { uint8_t *BufBegin = Buf; for (const DynamicReloc &Rel : Relocs) { auto *P = reinterpret_cast(Buf); Buf += Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); if (Config->IsRela) P->r_addend = Rel.getAddend(); P->r_offset = Rel.getOffset(); if (Config->EMachine == EM_MIPS && Rel.getInputSec() == InX::MipsGot) // Dynamic relocation against MIPS GOT section make deal TLS entries // allocated in the end of the GOT. We need to adjust the offset to take // in account 'local' and 'global' GOT entries. P->r_offset += InX::MipsGot->getTlsOffset(); P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->IsMips64EL); } if (Sort) { if (Config->IsRela) std::stable_sort((Elf_Rela *)BufBegin, (Elf_Rela *)BufBegin + Relocs.size(), compRelocations); else std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), compRelocations); } } template unsigned RelocationSection::getRelocOffset() { return this->Entsize * Relocs.size(); } template void RelocationSection::finalizeContents() { this->Link = InX::DynSymTab ? InX::DynSymTab->getParent()->SectionIndex : InX::SymTab->getParent()->SectionIndex; // Set required output section properties. getParent()->Link = this->Link; } SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &StrTabSec) : SyntheticSection(StrTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, Config->Wordsize, StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), StrTabSec(StrTabSec) {} // Orders symbols according to their positions in the GOT, // in compliance with MIPS ABI rules. // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf static bool sortMipsSymbols(const SymbolTableEntry &L, const SymbolTableEntry &R) { // Sort entries related to non-local preemptible symbols by GOT indexes. // All other entries go to the first part of GOT in arbitrary order. bool LIsInLocalGot = !L.Symbol->IsInGlobalMipsGot; bool RIsInLocalGot = !R.Symbol->IsInGlobalMipsGot; if (LIsInLocalGot || RIsInLocalGot) return !RIsInLocalGot; return L.Symbol->GotIndex < R.Symbol->GotIndex; } // Finalize a symbol table. The ELF spec requires that all local // symbols precede global symbols, so we sort symbol entries in this // function. (For .dynsym, we don't do that because symbols for // dynamic linking are inherently all globals.) void SymbolTableBaseSection::finalizeContents() { getParent()->Link = StrTabSec.getParent()->SectionIndex; // If it is a .dynsym, there should be no local symbols, but we need // to do a few things for the dynamic linker. if (this->Type == SHT_DYNSYM) { // Section's Info field has the index of the first non-local symbol. // Because the first symbol entry is a null entry, 1 is the first. getParent()->Info = 1; if (InX::GnuHashTab) { // NB: It also sorts Symbols to meet the GNU hash table requirements. InX::GnuHashTab->addSymbols(Symbols); } else if (Config->EMachine == EM_MIPS) { std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); } size_t I = 0; for (const SymbolTableEntry &S : Symbols) S.Symbol->DynsymIndex = ++I; return; } } void SymbolTableBaseSection::postThunkContents() { if (this->Type == SHT_DYNSYM) return; // move all local symbols before global symbols. auto It = std::stable_partition( Symbols.begin(), Symbols.end(), [](const SymbolTableEntry &S) { return S.Symbol->isLocal() || S.Symbol->symbol()->computeBinding() == STB_LOCAL; }); size_t NumLocals = It - Symbols.begin(); getParent()->Info = NumLocals + 1; } void SymbolTableBaseSection::addSymbol(SymbolBody *B) { // Adding a local symbol to a .dynsym is a bug. assert(this->Type != SHT_DYNSYM || !B->isLocal()); bool HashIt = B->isLocal(); Symbols.push_back({B, StrTabSec.addString(B->getName(), HashIt)}); } size_t SymbolTableBaseSection::getSymbolIndex(SymbolBody *Body) { auto I = llvm::find_if(Symbols, [&](const SymbolTableEntry &E) { if (E.Symbol == Body) return true; // This is used for -r, so we have to handle multiple section // symbols being combined. if (Body->Type == STT_SECTION && E.Symbol->Type == STT_SECTION) return Body->getOutputSection() == E.Symbol->getOutputSection(); return false; }); if (I == Symbols.end()) return 0; return I - Symbols.begin() + 1; } template SymbolTableSection::SymbolTableSection(StringTableSection &StrTabSec) : SymbolTableBaseSection(StrTabSec) { this->Entsize = sizeof(Elf_Sym); } // Write the internal symbol table contents to the output symbol table. template void SymbolTableSection::writeTo(uint8_t *Buf) { // The first entry is a null entry as per the ELF spec. Buf += sizeof(Elf_Sym); auto *ESym = reinterpret_cast(Buf); for (SymbolTableEntry &Ent : Symbols) { SymbolBody *Body = Ent.Symbol; // Set st_info and st_other. if (Body->isLocal()) { ESym->setBindingAndType(STB_LOCAL, Body->Type); } else { ESym->setBindingAndType(Body->symbol()->computeBinding(), Body->Type); ESym->setVisibility(Body->symbol()->Visibility); } ESym->st_name = Ent.StrTabOffset; - ESym->st_size = Body->getSize(); // Set a section index. if (const OutputSection *OutSec = Body->getOutputSection()) ESym->st_shndx = OutSec->SectionIndex; else if (isa(Body)) ESym->st_shndx = SHN_ABS; else if (isa(Body)) ESym->st_shndx = SHN_COMMON; + // Copy symbol size if it is a defined symbol. st_size is not significant + // for undefined symbols, so whether copying it or not is up to us if that's + // the case. We'll leave it as zero because by not setting a value, we can + // get the exact same outputs for two sets of input files that differ only + // in undefined symbol size in DSOs. + if (ESym->st_shndx != SHN_UNDEF) + ESym->st_size = Body->getSize(); + // st_value is usually an address of a symbol, but that has a // special meaining for uninstantiated common symbols (this can // occur if -r is given). if (!Config->DefineCommon && isa(Body)) ESym->st_value = cast(Body)->Alignment; else ESym->st_value = Body->getVA(); ++ESym; } // On MIPS we need to mark symbol which has a PLT entry and requires // pointer equality by STO_MIPS_PLT flag. That is necessary to help // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. // https://sourceware.org/ml/binutils/2008-07/txt00000.txt if (Config->EMachine == EM_MIPS) { auto *ESym = reinterpret_cast(Buf); for (SymbolTableEntry &Ent : Symbols) { SymbolBody *Body = Ent.Symbol; if (Body->isInPlt() && Body->NeedsPltAddr) ESym->st_other |= STO_MIPS_PLT; if (Config->Relocatable) if (auto *D = dyn_cast(Body)) if (D->isMipsPIC()) ESym->st_other |= STO_MIPS_PIC; ++ESym; } } } // .hash and .gnu.hash sections contain on-disk hash tables that map // symbol names to their dynamic symbol table indices. Their purpose // is to help the dynamic linker resolve symbols quickly. If ELF files // don't have them, the dynamic linker has to do linear search on all // dynamic symbols, which makes programs slower. Therefore, a .hash // section is added to a DSO by default. A .gnu.hash is added if you // give the -hash-style=gnu or -hash-style=both option. // // The Unix semantics of resolving dynamic symbols is somewhat expensive. // Each ELF file has a list of DSOs that the ELF file depends on and a // list of dynamic symbols that need to be resolved from any of the // DSOs. That means resolving all dynamic symbols takes O(m)*O(n) // where m is the number of DSOs and n is the number of dynamic // symbols. For modern large programs, both m and n are large. So // making each step faster by using hash tables substiantially // improves time to load programs. // // (Note that this is not the only way to design the shared library. // For instance, the Windows DLL takes a different approach. On // Windows, each dynamic symbol has a name of DLL from which the symbol // has to be resolved. That makes the cost of symbol resolution O(n). // This disables some hacky techniques you can use on Unix such as // LD_PRELOAD, but this is arguably better semantics than the Unix ones.) // // Due to historical reasons, we have two different hash tables, .hash // and .gnu.hash. They are for the same purpose, and .gnu.hash is a new // and better version of .hash. .hash is just an on-disk hash table, but // .gnu.hash has a bloom filter in addition to a hash table to skip // DSOs very quickly. If you are sure that your dynamic linker knows // about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a // safe bet is to specify -hash-style=both for backward compatibilty. GnuHashTableSection::GnuHashTableSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, Config->Wordsize, ".gnu.hash") { } void GnuHashTableSection::finalizeContents() { getParent()->Link = InX::DynSymTab->getParent()->SectionIndex; // Computes bloom filter size in word size. We want to allocate 8 // bits for each symbol. It must be a power of two. if (Symbols.empty()) MaskWords = 1; else MaskWords = NextPowerOf2((Symbols.size() - 1) / Config->Wordsize); Size = 16; // Header Size += Config->Wordsize * MaskWords; // Bloom filter Size += NBuckets * 4; // Hash buckets Size += Symbols.size() * 4; // Hash values } void GnuHashTableSection::writeTo(uint8_t *Buf) { // Write a header. write32(Buf, NBuckets, Config->Endianness); write32(Buf + 4, InX::DynSymTab->getNumSymbols() - Symbols.size(), Config->Endianness); write32(Buf + 8, MaskWords, Config->Endianness); write32(Buf + 12, getShift2(), Config->Endianness); Buf += 16; // Write a bloom filter and a hash table. writeBloomFilter(Buf); Buf += Config->Wordsize * MaskWords; writeHashTable(Buf); } // This function writes a 2-bit bloom filter. This bloom filter alone // usually filters out 80% or more of all symbol lookups [1]. // The dynamic linker uses the hash table only when a symbol is not // filtered out by a bloom filter. // // [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2), // p.9, https://www.akkadia.org/drepper/dsohowto.pdf void GnuHashTableSection::writeBloomFilter(uint8_t *Buf) { const unsigned C = Config->Wordsize * 8; for (const Entry &Sym : Symbols) { size_t I = (Sym.Hash / C) & (MaskWords - 1); uint64_t Val = readUint(Buf + I * Config->Wordsize); Val |= uint64_t(1) << (Sym.Hash % C); Val |= uint64_t(1) << ((Sym.Hash >> getShift2()) % C); writeUint(Buf + I * Config->Wordsize, Val); } } void GnuHashTableSection::writeHashTable(uint8_t *Buf) { // Group symbols by hash value. std::vector> Syms(NBuckets); for (const Entry &Ent : Symbols) Syms[Ent.Hash % NBuckets].push_back(Ent); // Write hash buckets. Hash buckets contain indices in the following // hash value table. uint32_t *Buckets = reinterpret_cast(Buf); for (size_t I = 0; I < NBuckets; ++I) if (!Syms[I].empty()) write32(Buckets + I, Syms[I][0].Body->DynsymIndex, Config->Endianness); // Write a hash value table. It represents a sequence of chains that // share the same hash modulo value. The last element of each chain // is terminated by LSB 1. uint32_t *Values = Buckets + NBuckets; size_t I = 0; for (std::vector &Vec : Syms) { if (Vec.empty()) continue; for (const Entry &Ent : makeArrayRef(Vec).drop_back()) write32(Values + I++, Ent.Hash & ~1, Config->Endianness); write32(Values + I++, Vec.back().Hash | 1, Config->Endianness); } } static uint32_t hashGnu(StringRef Name) { uint32_t H = 5381; for (uint8_t C : Name) H = (H << 5) + H + C; return H; } // Returns a number of hash buckets to accomodate given number of elements. // We want to choose a moderate number that is not too small (which // causes too many hash collisions) and not too large (which wastes // disk space.) // // We return a prime number because it (is believed to) achieve good // hash distribution. static size_t getBucketSize(size_t NumSymbols) { // List of largest prime numbers that are not greater than 2^n + 1. for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509, 251, 127, 61, 31, 13, 7, 3, 1}) if (N <= NumSymbols) return N; return 0; } // Add symbols to this symbol hash table. Note that this function // destructively sort a given vector -- which is needed because // GNU-style hash table places some sorting requirements. void GnuHashTableSection::addSymbols(std::vector &V) { // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce // its type correctly. std::vector::iterator Mid = std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { return S.Symbol->isUndefined(); }); if (Mid == V.end()) return; for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) { SymbolBody *B = Ent.Symbol; Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())}); } NBuckets = getBucketSize(Symbols.size()); std::stable_sort(Symbols.begin(), Symbols.end(), [&](const Entry &L, const Entry &R) { return L.Hash % NBuckets < R.Hash % NBuckets; }); V.erase(Mid, V.end()); for (const Entry &Ent : Symbols) V.push_back({Ent.Body, Ent.StrTabOffset}); } template HashTableSection::HashTableSection() : SyntheticSection(SHF_ALLOC, SHT_HASH, 4, ".hash") { this->Entsize = 4; } template void HashTableSection::finalizeContents() { getParent()->Link = InX::DynSymTab->getParent()->SectionIndex; unsigned NumEntries = 2; // nbucket and nchain. NumEntries += InX::DynSymTab->getNumSymbols(); // The chain entries. // Create as many buckets as there are symbols. // FIXME: This is simplistic. We can try to optimize it, but implementing // support for SHT_GNU_HASH is probably even more profitable. NumEntries += InX::DynSymTab->getNumSymbols(); this->Size = NumEntries * 4; } template void HashTableSection::writeTo(uint8_t *Buf) { // A 32-bit integer type in the target endianness. typedef typename ELFT::Word Elf_Word; unsigned NumSymbols = InX::DynSymTab->getNumSymbols(); auto *P = reinterpret_cast(Buf); *P++ = NumSymbols; // nbucket *P++ = NumSymbols; // nchain Elf_Word *Buckets = P; Elf_Word *Chains = P + NumSymbols; for (const SymbolTableEntry &S : InX::DynSymTab->getSymbols()) { SymbolBody *Body = S.Symbol; StringRef Name = Body->getName(); unsigned I = Body->DynsymIndex; uint32_t Hash = hashSysV(Name) % NumSymbols; Chains[I] = Buckets[Hash]; Buckets[Hash] = I; } } PltSection::PltSection(size_t S) : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), - HeaderSize(S) {} + HeaderSize(S) { + // The PLT needs to be writable on SPARC as the dynamic linker will + // modify the instructions in the PLT entries. + if (Config->EMachine == EM_SPARCV9) + this->Flags |= SHF_WRITE; +} void PltSection::writeTo(uint8_t *Buf) { // At beginning of PLT but not the IPLT, we have code to call the dynamic // linker to resolve dynsyms at runtime. Write such code. if (HeaderSize != 0) Target->writePltHeader(Buf); size_t Off = HeaderSize; // The IPlt is immediately after the Plt, account for this in RelOff unsigned PltOff = getPltRelocOff(); for (auto &I : Entries) { const SymbolBody *B = I.first; unsigned RelOff = I.second + PltOff; uint64_t Got = B->getGotPltVA(); uint64_t Plt = this->getVA() + Off; Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); Off += Target->PltEntrySize; } } template void PltSection::addEntry(SymbolBody &Sym) { Sym.PltIndex = Entries.size(); RelocationSection *PltRelocSection = In::RelaPlt; if (HeaderSize == 0) { PltRelocSection = In::RelaIplt; Sym.IsInIplt = true; } unsigned RelOff = PltRelocSection->getRelocOffset(); Entries.push_back(std::make_pair(&Sym, RelOff)); } size_t PltSection::getSize() const { return HeaderSize + Entries.size() * Target->PltEntrySize; } // Some architectures such as additional symbols in the PLT section. For // example ARM uses mapping symbols to aid disassembly void PltSection::addSymbols() { // The PLT may have symbols defined for the Header, the IPLT has no header if (HeaderSize != 0) Target->addPltHeaderSymbols(this); size_t Off = HeaderSize; for (size_t I = 0; I < Entries.size(); ++I) { Target->addPltSymbols(this, Off); Off += Target->PltEntrySize; } } unsigned PltSection::getPltRelocOff() const { return (HeaderSize == 0) ? InX::Plt->getSize() : 0; } GdbIndexSection::GdbIndexSection() : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), StringPool(llvm::StringTableBuilder::ELF) {} // Iterative hash function for symbol's name is described in .gdb_index format // specification. Note that we use one for version 5 to 7 here, it is different // for version 4. static uint32_t hash(StringRef Str) { uint32_t R = 0; for (uint8_t C : Str) R = R * 67 + tolower(C) - 113; return R; } static std::vector readCuList(DWARFContext &Dwarf, InputSection *Sec) { std::vector Ret; for (std::unique_ptr &CU : Dwarf.compile_units()) Ret.push_back({Sec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); return Ret; } static std::vector readAddressArea(DWARFContext &Dwarf, InputSection *Sec) { std::vector Ret; uint32_t CurrentCu = 0; for (std::unique_ptr &CU : Dwarf.compile_units()) { DWARFAddressRangesVector Ranges; CU->collectAddressRanges(Ranges); ArrayRef Sections = Sec->File->getSections(); for (DWARFAddressRange &R : Ranges) { InputSectionBase *S = Sections[R.SectionIndex]; if (!S || S == &InputSection::Discarded || !S->Live) continue; // Range list with zero size has no effect. if (R.LowPC == R.HighPC) continue; Ret.push_back({cast(S), R.LowPC, R.HighPC, CurrentCu}); } ++CurrentCu; } return Ret; } static std::vector readPubNamesAndTypes(DWARFContext &Dwarf, bool IsLE) { StringRef Data[] = {Dwarf.getGnuPubNamesSection(), Dwarf.getGnuPubTypesSection()}; std::vector Ret; for (StringRef D : Data) { DWARFDebugPubTable PubTable(D, IsLE, true); for (const DWARFDebugPubTable::Set &Set : PubTable.getData()) for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) Ret.push_back({Ent.Name, Ent.Descriptor.toBits()}); } return Ret; } static std::vector getDebugInfoSections() { std::vector Ret; for (InputSectionBase *S : InputSections) if (InputSection *IS = dyn_cast(S)) if (IS->getParent() && IS->Name == ".debug_info") Ret.push_back(IS); return Ret; } void GdbIndexSection::buildIndex() { std::vector V = getDebugInfoSections(); if (V.empty()) return; for (InputSection *Sec : V) Chunks.push_back(readDwarf(Sec)); uint32_t CuId = 0; for (GdbIndexChunk &D : Chunks) { for (AddressEntry &E : D.AddressArea) E.CuIndex += CuId; // Populate constant pool area. for (NameTypeEntry &NameType : D.NamesAndTypes) { uint32_t Hash = hash(NameType.Name); size_t Offset = StringPool.add(NameType.Name); bool IsNew; GdbSymbol *Sym; std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); if (IsNew) { Sym->CuVectorIndex = CuVectors.size(); CuVectors.resize(CuVectors.size() + 1); } CuVectors[Sym->CuVectorIndex].insert(CuId | (NameType.Type << 24)); } CuId += D.CompilationUnits.size(); } } GdbIndexChunk GdbIndexSection::readDwarf(InputSection *Sec) { Expected> Obj = object::ObjectFile::createObjectFile(Sec->File->MB); if (!Obj) { error(toString(Sec->File) + ": error creating DWARF context"); return {}; } DWARFContextInMemory Dwarf(*Obj.get()); GdbIndexChunk Ret; Ret.CompilationUnits = readCuList(Dwarf, Sec); Ret.AddressArea = readAddressArea(Dwarf, Sec); Ret.NamesAndTypes = readPubNamesAndTypes(Dwarf, Config->IsLE); return Ret; } static size_t getCuSize(std::vector &C) { size_t Ret = 0; for (GdbIndexChunk &D : C) Ret += D.CompilationUnits.size(); return Ret; } static size_t getAddressAreaSize(std::vector &C) { size_t Ret = 0; for (GdbIndexChunk &D : C) Ret += D.AddressArea.size(); return Ret; } void GdbIndexSection::finalizeContents() { if (Finalized) return; Finalized = true; buildIndex(); SymbolTable.finalizeContents(); // GdbIndex header consist from version fields // and 5 more fields with different kinds of offsets. CuTypesOffset = CuListOffset + getCuSize(Chunks) * CompilationUnitSize; SymTabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * AddressEntrySize; ConstantPoolOffset = SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; for (std::set &CuVec : CuVectors) { CuVectorsOffset.push_back(CuVectorsSize); CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); } StringPoolOffset = ConstantPoolOffset + CuVectorsSize; StringPool.finalizeInOrder(); } size_t GdbIndexSection::getSize() const { const_cast(this)->finalizeContents(); return StringPoolOffset + StringPool.getSize(); } void GdbIndexSection::writeTo(uint8_t *Buf) { write32le(Buf, 7); // Write version. write32le(Buf + 4, CuListOffset); // CU list offset. write32le(Buf + 8, CuTypesOffset); // Types CU list offset. write32le(Buf + 12, CuTypesOffset); // Address area offset. write32le(Buf + 16, SymTabOffset); // Symbol table offset. write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. Buf += 24; // Write the CU list. for (GdbIndexChunk &D : Chunks) { for (CompilationUnitEntry &Cu : D.CompilationUnits) { write64le(Buf, Cu.CuOffset); write64le(Buf + 8, Cu.CuLength); Buf += 16; } } // Write the address area. for (GdbIndexChunk &D : Chunks) { for (AddressEntry &E : D.AddressArea) { uint64_t BaseAddr = E.Section->getParent()->Addr + E.Section->getOffset(0); write64le(Buf, BaseAddr + E.LowAddress); write64le(Buf + 8, BaseAddr + E.HighAddress); write32le(Buf + 16, E.CuIndex); Buf += 20; } } // Write the symbol table. for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { GdbSymbol *Sym = SymbolTable.getSymbol(I); if (Sym) { size_t NameOffset = Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; write32le(Buf, NameOffset); write32le(Buf + 4, CuVectorOffset); } Buf += 8; } // Write the CU vectors into the constant pool. for (std::set &CuVec : CuVectors) { write32le(Buf, CuVec.size()); Buf += 4; for (uint32_t Val : CuVec) { write32le(Buf, Val); Buf += 4; } } StringPool.write(Buf); } bool GdbIndexSection::empty() const { return !Out::DebugInfo; } template EhFrameHeader::EhFrameHeader() : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} // .eh_frame_hdr contains a binary search table of pointers to FDEs. // Each entry of the search table consists of two values, // the starting PC from where FDEs covers, and the FDE's address. // It is sorted by PC. template void EhFrameHeader::writeTo(uint8_t *Buf) { const endianness E = ELFT::TargetEndianness; // Sort the FDE list by their PC and uniqueify. Usually there is only // one FDE for a PC (i.e. function), but if ICF merges two functions // into one, there can be more than one FDEs pointing to the address. auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; std::stable_sort(Fdes.begin(), Fdes.end(), Less); auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); Buf[0] = 1; Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; Buf[2] = DW_EH_PE_udata4; Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; write32(Buf + 4, In::EhFrame->getParent()->Addr - this->getVA() - 4); write32(Buf + 8, Fdes.size()); Buf += 12; uint64_t VA = this->getVA(); for (FdeData &Fde : Fdes) { write32(Buf, Fde.Pc - VA); write32(Buf + 4, Fde.FdeVA - VA); Buf += 8; } } template size_t EhFrameHeader::getSize() const { // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. return 12 + In::EhFrame->NumFdes * 8; } template void EhFrameHeader::addFde(uint32_t Pc, uint32_t FdeVA) { Fdes.push_back({Pc, FdeVA}); } template bool EhFrameHeader::empty() const { return In::EhFrame->empty(); } template VersionDefinitionSection::VersionDefinitionSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), ".gnu.version_d") {} static StringRef getFileDefName() { if (!Config->SoName.empty()) return Config->SoName; return Config->OutputFile; } template void VersionDefinitionSection::finalizeContents() { FileDefNameOff = InX::DynStrTab->addString(getFileDefName()); for (VersionDefinition &V : Config->VersionDefinitions) V.NameOff = InX::DynStrTab->addString(V.Name); getParent()->Link = InX::DynStrTab->getParent()->SectionIndex; // sh_info should be set to the number of definitions. This fact is missed in // documentation, but confirmed by binutils community: // https://sourceware.org/ml/binutils/2014-11/msg00355.html getParent()->Info = getVerDefNum(); } template void VersionDefinitionSection::writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff) { auto *Verdef = reinterpret_cast(Buf); Verdef->vd_version = 1; Verdef->vd_cnt = 1; Verdef->vd_aux = sizeof(Elf_Verdef); Verdef->vd_next = sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); Verdef->vd_flags = (Index == 1 ? VER_FLG_BASE : 0); Verdef->vd_ndx = Index; Verdef->vd_hash = hashSysV(Name); auto *Verdaux = reinterpret_cast(Buf + sizeof(Elf_Verdef)); Verdaux->vda_name = NameOff; Verdaux->vda_next = 0; } template void VersionDefinitionSection::writeTo(uint8_t *Buf) { writeOne(Buf, 1, getFileDefName(), FileDefNameOff); for (VersionDefinition &V : Config->VersionDefinitions) { Buf += sizeof(Elf_Verdef) + sizeof(Elf_Verdaux); writeOne(Buf, V.Id, V.Name, V.NameOff); } // Need to terminate the last version definition. Elf_Verdef *Verdef = reinterpret_cast(Buf); Verdef->vd_next = 0; } template size_t VersionDefinitionSection::getSize() const { return (sizeof(Elf_Verdef) + sizeof(Elf_Verdaux)) * getVerDefNum(); } template VersionTableSection::VersionTableSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), ".gnu.version") { this->Entsize = sizeof(Elf_Versym); } template void VersionTableSection::finalizeContents() { // At the moment of june 2016 GNU docs does not mention that sh_link field // should be set, but Sun docs do. Also readelf relies on this field. getParent()->Link = InX::DynSymTab->getParent()->SectionIndex; } template size_t VersionTableSection::getSize() const { return sizeof(Elf_Versym) * (InX::DynSymTab->getSymbols().size() + 1); } template void VersionTableSection::writeTo(uint8_t *Buf) { auto *OutVersym = reinterpret_cast(Buf) + 1; for (const SymbolTableEntry &S : InX::DynSymTab->getSymbols()) { OutVersym->vs_index = S.Symbol->symbol()->VersionId; ++OutVersym; } } template bool VersionTableSection::empty() const { return !In::VerDef && In::VerNeed->empty(); } template VersionNeedSection::VersionNeedSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), ".gnu.version_r") { // Identifiers in verneed section start at 2 because 0 and 1 are reserved // for VER_NDX_LOCAL and VER_NDX_GLOBAL. // First identifiers are reserved by verdef section if it exist. NextIndex = getVerDefNum() + 1; } template void VersionNeedSection::addSymbol(SharedSymbol *SS) { auto *Ver = reinterpret_cast(SS->Verdef); if (!Ver) { SS->symbol()->VersionId = VER_NDX_GLOBAL; return; } auto *File = cast>(SS->File); // If we don't already know that we need an Elf_Verneed for this DSO, prepare // to create one by adding it to our needed list and creating a dynstr entry // for the soname. if (File->VerdefMap.empty()) Needed.push_back({File, InX::DynStrTab->addString(File->SoName)}); typename SharedFile::NeededVer &NV = File->VerdefMap[Ver]; // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, // prepare to create one by allocating a version identifier and creating a // dynstr entry for the version name. if (NV.Index == 0) { NV.StrTab = InX::DynStrTab->addString(File->getStringTable().data() + Ver->getAux()->vda_name); NV.Index = NextIndex++; } SS->symbol()->VersionId = NV.Index; } template void VersionNeedSection::writeTo(uint8_t *Buf) { // The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs. auto *Verneed = reinterpret_cast(Buf); auto *Vernaux = reinterpret_cast(Verneed + Needed.size()); for (std::pair *, size_t> &P : Needed) { // Create an Elf_Verneed for this DSO. Verneed->vn_version = 1; Verneed->vn_cnt = P.first->VerdefMap.size(); Verneed->vn_file = P.second; Verneed->vn_aux = reinterpret_cast(Vernaux) - reinterpret_cast(Verneed); Verneed->vn_next = sizeof(Elf_Verneed); ++Verneed; // Create the Elf_Vernauxs for this Elf_Verneed. The loop iterates over // VerdefMap, which will only contain references to needed version // definitions. Each Elf_Vernaux is based on the information contained in // the Elf_Verdef in the source DSO. This loop iterates over a std::map of // pointers, but is deterministic because the pointers refer to Elf_Verdef // data structures within a single input file. for (auto &NV : P.first->VerdefMap) { Vernaux->vna_hash = NV.first->vd_hash; Vernaux->vna_flags = 0; Vernaux->vna_other = NV.second.Index; Vernaux->vna_name = NV.second.StrTab; Vernaux->vna_next = sizeof(Elf_Vernaux); ++Vernaux; } Vernaux[-1].vna_next = 0; } Verneed[-1].vn_next = 0; } template void VersionNeedSection::finalizeContents() { getParent()->Link = InX::DynStrTab->getParent()->SectionIndex; getParent()->Info = Needed.size(); } template size_t VersionNeedSection::getSize() const { unsigned Size = Needed.size() * sizeof(Elf_Verneed); for (const std::pair *, size_t> &P : Needed) Size += P.first->VerdefMap.size() * sizeof(Elf_Vernaux); return Size; } template bool VersionNeedSection::empty() const { return getNeedNum() == 0; } MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags, uint32_t Alignment) : SyntheticSection(Flags, Type, Alignment, Name), Builder(StringTableBuilder::RAW, Alignment) {} void MergeSyntheticSection::addSection(MergeInputSection *MS) { MS->Parent = this; Sections.push_back(MS); } void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } bool MergeSyntheticSection::shouldTailMerge() const { return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; } void MergeSyntheticSection::finalizeTailMerge() { // Add all string pieces to the string table builder to create section // contents. for (MergeInputSection *Sec : Sections) for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) Builder.add(Sec->getData(I)); // Fix the string table content. After this, the contents will never change. Builder.finalize(); // finalize() fixed tail-optimized strings, so we can now get // offsets of strings. Get an offset for each string and save it // to a corresponding StringPiece for easy access. for (MergeInputSection *Sec : Sections) for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); } void MergeSyntheticSection::finalizeNoTailMerge() { // Add all string pieces to the string table builder to create section // contents. Because we are not tail-optimizing, offsets of strings are // fixed when they are added to the builder (string table builder contains // a hash table from strings to offsets). for (MergeInputSection *Sec : Sections) for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) if (Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); Builder.finalizeInOrder(); } void MergeSyntheticSection::finalizeContents() { if (shouldTailMerge()) finalizeTailMerge(); else finalizeNoTailMerge(); } size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); } // This function decompresses compressed sections and scans over the input // sections to create mergeable synthetic sections. It removes // MergeInputSections from the input section array and adds new synthetic // sections at the location of the first input section that it replaces. It then // finalizes each synthetic section in order to compute an output offset for // each piece of each input section. void elf::decompressAndMergeSections() { // splitIntoPieces needs to be called on each MergeInputSection before calling // finalizeContents(). Do that first. parallelForEach(InputSections.begin(), InputSections.end(), [](InputSectionBase *S) { if (!S->Live) return; if (Decompressor::isCompressedELFSection(S->Flags, S->Name)) S->uncompress(); if (auto *MS = dyn_cast(S)) MS->splitIntoPieces(); }); std::vector MergeSections; for (InputSectionBase *&S : InputSections) { MergeInputSection *MS = dyn_cast(S); if (!MS) continue; // We do not want to handle sections that are not alive, so just remove // them instead of trying to merge. if (!MS->Live) continue; StringRef OutsecName = getOutputSectionName(MS->Name); uint64_t Flags = MS->Flags & ~(uint64_t)SHF_GROUP; uint32_t Alignment = std::max(MS->Alignment, MS->Entsize); auto I = llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) { return Sec->Name == OutsecName && Sec->Flags == Flags && Sec->Alignment == Alignment; }); if (I == MergeSections.end()) { MergeSyntheticSection *Syn = make(OutsecName, MS->Type, Flags, Alignment); MergeSections.push_back(Syn); I = std::prev(MergeSections.end()); S = Syn; } else { S = nullptr; } (*I)->addSection(MS); } for (auto *MS : MergeSections) MS->finalizeContents(); std::vector &V = InputSections; V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); } MipsRldMapSection::MipsRldMapSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Config->Wordsize, ".rld_map") {} ARMExidxSentinelSection::ARMExidxSentinelSection() : SyntheticSection(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, Config->Wordsize, ".ARM.exidx") {} // Write a terminating sentinel entry to the end of the .ARM.exidx table. // This section will have been sorted last in the .ARM.exidx table. // This table entry will have the form: // | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | // The sentinel must have the PREL31 value of an address higher than any // address described by any other table entry. void ARMExidxSentinelSection::writeTo(uint8_t *Buf) { // The Sections are sorted in order of ascending PREL31 address with the // sentinel last. We need to find the InputSection that precedes the // sentinel. By construction the Sentinel is in the last // InputSectionDescription as the InputSection that precedes it. OutputSectionCommand *C = Script->getCmd(getParent()); auto ISD = std::find_if(C->Commands.rbegin(), C->Commands.rend(), [](const BaseCommand *Base) { return isa(Base); }); auto L = cast(*ISD); InputSection *Highest = L->Sections[L->Sections.size() - 2]; InputSection *LS = Highest->getLinkOrderDep(); uint64_t S = LS->getParent()->Addr + LS->getOffset(LS->getSize()); uint64_t P = getVA(); Target->relocateOne(Buf, R_ARM_PREL31, S - P); write32le(Buf + 4, 0x1); } ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off) : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, Config->Wordsize, ".text.thunk") { this->Parent = OS; this->OutSecOff = Off; } void ThunkSection::addThunk(Thunk *T) { uint64_t Off = alignTo(Size, T->alignment); T->Offset = Off; Thunks.push_back(T); T->addSymbols(*this); Size = Off + T->size(); } void ThunkSection::writeTo(uint8_t *Buf) { for (const Thunk *T : Thunks) T->writeTo(Buf + T->Offset, *this); } InputSection *ThunkSection::getTargetInputSection() const { const Thunk *T = Thunks.front(); return T->getTargetInputSection(); } InputSection *InX::ARMAttributes; BssSection *InX::Bss; BssSection *InX::BssRelRo; BuildIdSection *InX::BuildId; InputSection *InX::Common; SyntheticSection *InX::Dynamic; StringTableSection *InX::DynStrTab; SymbolTableBaseSection *InX::DynSymTab; InputSection *InX::Interp; GdbIndexSection *InX::GdbIndex; GotSection *InX::Got; GotPltSection *InX::GotPlt; GnuHashTableSection *InX::GnuHashTab; IgotPltSection *InX::IgotPlt; MipsGotSection *InX::MipsGot; MipsRldMapSection *InX::MipsRldMap; PltSection *InX::Plt; PltSection *InX::Iplt; StringTableSection *InX::ShStrTab; StringTableSection *InX::StrTab; SymbolTableBaseSection *InX::SymTab; template void PltSection::addEntry(SymbolBody &Sym); template void PltSection::addEntry(SymbolBody &Sym); template void PltSection::addEntry(SymbolBody &Sym); template void PltSection::addEntry(SymbolBody &Sym); template InputSection *elf::createCommonSection(); template InputSection *elf::createCommonSection(); template InputSection *elf::createCommonSection(); template InputSection *elf::createCommonSection(); template MergeInputSection *elf::createCommentSection(); template MergeInputSection *elf::createCommentSection(); template MergeInputSection *elf::createCommentSection(); template MergeInputSection *elf::createCommentSection(); template class elf::MipsAbiFlagsSection; template class elf::MipsAbiFlagsSection; template class elf::MipsAbiFlagsSection; template class elf::MipsAbiFlagsSection; template class elf::MipsOptionsSection; template class elf::MipsOptionsSection; template class elf::MipsOptionsSection; template class elf::MipsOptionsSection; template class elf::MipsReginfoSection; template class elf::MipsReginfoSection; template class elf::MipsReginfoSection; template class elf::MipsReginfoSection; template class elf::DynamicSection; template class elf::DynamicSection; template class elf::DynamicSection; template class elf::DynamicSection; template class elf::RelocationSection; template class elf::RelocationSection; template class elf::RelocationSection; template class elf::RelocationSection; template class elf::SymbolTableSection; template class elf::SymbolTableSection; template class elf::SymbolTableSection; template class elf::SymbolTableSection; template class elf::HashTableSection; template class elf::HashTableSection; template class elf::HashTableSection; template class elf::HashTableSection; template class elf::EhFrameHeader; template class elf::EhFrameHeader; template class elf::EhFrameHeader; template class elf::EhFrameHeader; template class elf::VersionTableSection; template class elf::VersionTableSection; template class elf::VersionTableSection; template class elf::VersionTableSection; template class elf::VersionNeedSection; template class elf::VersionNeedSection; template class elf::VersionNeedSection; template class elf::VersionNeedSection; template class elf::VersionDefinitionSection; template class elf::VersionDefinitionSection; template class elf::VersionDefinitionSection; template class elf::VersionDefinitionSection; template class elf::EhFrameSection; template class elf::EhFrameSection; template class elf::EhFrameSection; template class elf::EhFrameSection; diff --git a/ELF/Target.cpp b/ELF/Target.cpp index c1a85e165258..c886419971bc 100644 --- a/ELF/Target.cpp +++ b/ELF/Target.cpp @@ -1,160 +1,162 @@ //===- Target.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Machine-specific things, such as applying relocations, creation of // GOT or PLT entries, etc., are handled in this file. // // Refer the ELF spec for the single letter variables, S, A or P, used // in this file. // // Some functions defined in this file has "relaxTls" as part of their names. // They do peephole optimization for TLS variables by rewriting instructions. // They are not part of the ABI but optional optimization, so you can skip // them if you are not interested in how TLS variables are optimized. // See the following paper for the details. // // Ulrich Drepper, ELF Handling For Thread-Local Storage // http://www.akkadia.org/drepper/tls.pdf // //===----------------------------------------------------------------------===// #include "Target.h" #include "Error.h" #include "InputFiles.h" #include "OutputSections.h" #include "SymbolTable.h" #include "Symbols.h" #include "llvm/Object/ELF.h" using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; TargetInfo *elf::Target; std::string lld::toString(uint32_t Type) { StringRef S = getELFRelocationTypeName(elf::Config->EMachine, Type); if (S == "Unknown") return ("Unknown (" + Twine(Type) + ")").str(); return S; } TargetInfo *elf::getTarget() { switch (Config->EMachine) { case EM_386: case EM_IAMCU: return getX86TargetInfo(); case EM_AARCH64: return getAArch64TargetInfo(); case EM_AMDGPU: return getAMDGPUTargetInfo(); case EM_ARM: return getARMTargetInfo(); case EM_AVR: return getAVRTargetInfo(); case EM_MIPS: switch (Config->EKind) { case ELF32LEKind: return getMipsTargetInfo(); case ELF32BEKind: return getMipsTargetInfo(); case ELF64LEKind: return getMipsTargetInfo(); case ELF64BEKind: return getMipsTargetInfo(); default: fatal("unsupported MIPS target"); } case EM_PPC: return getPPCTargetInfo(); case EM_PPC64: return getPPC64TargetInfo(); + case EM_SPARCV9: + return getSPARCV9TargetInfo(); case EM_X86_64: if (Config->EKind == ELF32LEKind) return getX32TargetInfo(); return getX86_64TargetInfo(); } fatal("unknown target machine"); } template static std::string getErrorLoc(const uint8_t *Loc) { for (InputSectionBase *D : InputSections) { auto *IS = dyn_cast_or_null(D); if (!IS || !IS->getParent()) continue; uint8_t *ISLoc = IS->getParent()->Loc + IS->OutSecOff; if (ISLoc <= Loc && Loc < ISLoc + IS->getSize()) return IS->template getLocation(Loc - ISLoc) + ": "; } return ""; } std::string elf::getErrorLocation(const uint8_t *Loc) { switch (Config->EKind) { case ELF32LEKind: return getErrorLoc(Loc); case ELF32BEKind: return getErrorLoc(Loc); case ELF64LEKind: return getErrorLoc(Loc); case ELF64BEKind: return getErrorLoc(Loc); default: llvm_unreachable("unknown ELF type"); } } TargetInfo::~TargetInfo() {} int64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, uint32_t Type) const { return 0; } bool TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { return false; } bool TargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, const SymbolBody &S) const { return false; } void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { writeGotPlt(Buf, S); } RelExpr TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const { return Expr; } void TargetInfo::relaxGot(uint8_t *Loc, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } void TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } void TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } void TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } void TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { llvm_unreachable("Should not have claimed to be relaxable"); } diff --git a/ELF/Target.h b/ELF/Target.h index bf703fd0086a..5914d9bbb7ef 100644 --- a/ELF/Target.h +++ b/ELF/Target.h @@ -1,158 +1,159 @@ //===- Target.h -------------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_ELF_TARGET_H #define LLD_ELF_TARGET_H #include "Error.h" #include "InputSection.h" #include "llvm/Object/ELF.h" namespace lld { std::string toString(uint32_t RelType); namespace elf { class InputFile; class SymbolBody; class TargetInfo { public: virtual bool isPicRel(uint32_t Type) const { return true; } virtual uint32_t getDynRel(uint32_t Type) const { return Type; } virtual void writeGotPltHeader(uint8_t *Buf) const {} virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {}; virtual void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const; virtual int64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; // If lazy binding is supported, the first entry of the PLT has code // to call the dynamic linker to resolve PLT entries the first time // they are called. This function writes that code. virtual void writePltHeader(uint8_t *Buf) const {} virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const {} virtual void addPltHeaderSymbols(InputSectionBase *IS) const {} virtual void addPltSymbols(InputSectionBase *IS, uint64_t Off) const {} // Returns true if a relocation only uses the low bits of a value such that // all those bits are in in the same page. For example, if the relocation // only uses the low 12 bits in a system with 4k pages. If this is true, the // bits will always have the same value at runtime and we don't have to emit // a dynamic relocation. virtual bool usesOnlyLowPageBits(uint32_t Type) const; // Decide whether a Thunk is needed for the relocation from File // targeting S. virtual bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, const SymbolBody &S) const; virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, const uint8_t *Loc) const = 0; virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; virtual ~TargetInfo(); unsigned TlsGdRelaxSkip = 1; unsigned PageSize = 4096; unsigned DefaultMaxPageSize = 4096; // On FreeBSD x86_64 the first page cannot be mmaped. // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64 // installs that is 65536, so the first 15 pages cannot be used. // Given that, the smallest value that can be used in here is 0x10000. uint64_t DefaultImageBase = 0x10000; // Offset of _GLOBAL_OFFSET_TABLE_ from base of .got section. Use -1 for // end of .got uint64_t GotBaseSymOff = 0; uint32_t CopyRel; uint32_t GotRel; uint32_t PltRel; uint32_t RelativeRel; uint32_t IRelativeRel; uint32_t TlsDescRel; uint32_t TlsGotRel; uint32_t TlsModuleIndexRel; uint32_t TlsOffsetRel; unsigned GotEntrySize = 0; unsigned GotPltEntrySize = 0; unsigned PltEntrySize; unsigned PltHeaderSize; // At least on x86_64 positions 1 and 2 are used by the first plt entry // to support lazy loading. unsigned GotPltHeaderEntriesNum = 3; // Set to 0 for variant 2 unsigned TcbSize = 0; bool NeedsThunks = false; // A 4-byte field corresponding to one or more trap instructions, used to pad // executable OutputSections. uint32_t TrapInstr = 0; virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const; }; TargetInfo *getAArch64TargetInfo(); TargetInfo *getAMDGPUTargetInfo(); TargetInfo *getARMTargetInfo(); TargetInfo *getAVRTargetInfo(); TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); +TargetInfo *getSPARCV9TargetInfo(); TargetInfo *getX32TargetInfo(); TargetInfo *getX86TargetInfo(); TargetInfo *getX86_64TargetInfo(); template TargetInfo *getMipsTargetInfo(); std::string getErrorLocation(const uint8_t *Loc); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t Expr); extern TargetInfo *Target; TargetInfo *getTarget(); template static void checkInt(uint8_t *Loc, int64_t V, uint32_t Type) { if (!llvm::isInt(V)) error(getErrorLocation(Loc) + "relocation " + lld::toString(Type) + " out of range"); } template static void checkUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { if (!llvm::isUInt(V)) error(getErrorLocation(Loc) + "relocation " + lld::toString(Type) + " out of range"); } template static void checkIntUInt(uint8_t *Loc, uint64_t V, uint32_t Type) { if (!llvm::isInt(V) && !llvm::isUInt(V)) error(getErrorLocation(Loc) + "relocation " + lld::toString(Type) + " out of range"); } template static void checkAlignment(uint8_t *Loc, uint64_t V, uint32_t Type) { if ((V & (N - 1)) != 0) error(getErrorLocation(Loc) + "improper alignment for relocation " + lld::toString(Type)); } } // namespace elf } #endif diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp index 4c12b18836bf..080d8e787301 100644 --- a/ELF/Writer.cpp +++ b/ELF/Writer.cpp @@ -1,1902 +1,1902 @@ //===- Writer.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Writer.h" #include "Config.h" #include "Filesystem.h" #include "LinkerScript.h" #include "MapFile.h" #include "Memory.h" #include "OutputSections.h" #include "Relocations.h" #include "Strings.h" #include "SymbolTable.h" #include "SyntheticSections.h" #include "Target.h" #include "Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; namespace { // The writer writes a SymbolTable result to a file. template class Writer { public: typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Ehdr Elf_Ehdr; typedef typename ELFT::Phdr Elf_Phdr; void run(); private: void clearOutputSections(); void createSyntheticSections(); void copyLocalSymbols(); void addSectionSymbols(); void addReservedSymbols(); void createSections(); void forEachRelSec(std::function Fn); void sortSections(); void finalizeSections(); void addPredefinedSections(); std::vector createPhdrs(); void removeEmptyPTLoad(); void addPtArmExid(std::vector &Phdrs); void assignFileOffsets(); void assignFileOffsetsBinary(); void setPhdrs(); void fixSectionAlignments(); void fixPredefinedSymbols(); void openFile(); void writeHeader(); void writeSections(); void writeSectionsBinary(); void writeBuildId(); std::unique_ptr Buffer; OutputSectionFactory Factory{OutputSections}; void addRelIpltSymbols(); void addStartEndSymbols(); void addStartStopSymbols(OutputSection *Sec); uint64_t getEntryAddr(); OutputSection *findSection(StringRef Name); OutputSection *findSectionInScript(StringRef Name); OutputSectionCommand *findSectionCommand(StringRef Name); std::vector Phdrs; uint64_t FileSize; uint64_t SectionHeaderOff; bool HasGotBaseSym = false; }; } // anonymous namespace StringRef elf::getOutputSectionName(StringRef Name) { // ".zdebug_" is a prefix for ZLIB-compressed sections. // Because we decompressed input sections, we want to remove 'z'. if (Name.startswith(".zdebug_")) return Saver.save("." + Name.substr(2)); if (Config->Relocatable) return Name; for (StringRef V : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", - ".gcc_except_table.", ".tdata.", ".ARM.exidx."}) { + ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { StringRef Prefix = V.drop_back(); if (Name.startswith(V) || Name == Prefix) return Prefix; } // CommonSection is identified as "COMMON" in linker scripts. // By default, it should go to .bss section. if (Name == "COMMON") return ".bss"; return Name; } template static bool needsInterpSection() { return !Symtab::X->getSharedFiles().empty() && !Config->DynamicLinker.empty() && !Script->ignoreInterpSection(); } template void elf::writeResult() { Writer().run(); } template void Writer::removeEmptyPTLoad() { auto I = std::remove_if(Phdrs.begin(), Phdrs.end(), [&](const PhdrEntry &P) { if (P.p_type != PT_LOAD) return false; if (!P.First) return true; uint64_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; return Size == 0; }); Phdrs.erase(I, Phdrs.end()); } template static void combineEhFrameSections() { for (InputSectionBase *&S : InputSections) { EhInputSection *ES = dyn_cast(S); if (!ES || !ES->Live) continue; In::EhFrame->addSection(ES); S = nullptr; } std::vector &V = InputSections; V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); } template void Writer::clearOutputSections() { if (Script->Opt.HasSections) Script->createOrphanCommands(); else Script->fabricateDefaultCommands(); // Clear the OutputSections to make sure it is not used anymore. Any // code from this point on should be using the linker script // commands. for (OutputSection *Sec : OutputSections) Sec->Sections.clear(); OutputSections.clear(); } // The main function of the writer. template void Writer::run() { // Create linker-synthesized sections such as .got or .plt. // Such sections are of type input section. createSyntheticSections(); if (!Config->Relocatable) combineEhFrameSections(); // We need to create some reserved symbols such as _end. Create them. if (!Config->Relocatable) addReservedSymbols(); // Create output sections. if (Script->Opt.HasSections) { // If linker script contains SECTIONS commands, let it create sections. Script->processCommands(Factory); // Linker scripts may have left some input sections unassigned. // Assign such sections using the default rule. Script->addOrphanSections(Factory); } else { // If linker script does not contain SECTIONS commands, create // output sections by default rules. We still need to give the // linker script a chance to run, because it might contain // non-SECTIONS commands such as ASSERT. createSections(); Script->processCommands(Factory); } if (Config->Discard != DiscardPolicy::All) copyLocalSymbols(); if (Config->CopyRelocs) addSectionSymbols(); // Now that we have a complete set of output sections. This function // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. // finalizeSections does that. finalizeSections(); if (ErrorCount) return; if (!Script->Opt.HasSections && !Config->Relocatable) fixSectionAlignments(); // If -compressed-debug-sections is specified, we need to compress // .debug_* sections. Do it right now because it changes the size of // output sections. parallelForEach( OutputSectionCommands.begin(), OutputSectionCommands.end(), [](OutputSectionCommand *Cmd) { Cmd->maybeCompress(); }); Script->assignAddresses(Phdrs); // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a // 0 sized region. This has to be done late since only after assignAddresses // we know the size of the sections. removeEmptyPTLoad(); if (!Config->OFormatBinary) assignFileOffsets(); else assignFileOffsetsBinary(); setPhdrs(); if (Config->Relocatable) { for (OutputSectionCommand *Cmd : OutputSectionCommands) Cmd->Sec->Addr = 0; } else { fixPredefinedSymbols(); } // It does not make sense try to open the file if we have error already. if (ErrorCount) return; // Write the result down to a file. openFile(); if (ErrorCount) return; if (!Config->OFormatBinary) { writeHeader(); writeSections(); } else { writeSectionsBinary(); } // Backfill .note.gnu.build-id section content. This is done at last // because the content is usually a hash value of the entire output file. writeBuildId(); if (ErrorCount) return; // Handle -Map option. writeMapFile(OutputSectionCommands); if (ErrorCount) return; if (auto EC = Buffer->commit()) error("failed to write to the output file: " + EC.message()); // Flush the output streams and exit immediately. A full shutdown // is a good test that we are keeping track of all allocated memory, // but actually freeing it is a waste of time in a regular linker run. if (Config->ExitEarly) exitLld(0); } // Initialize Out members. template void Writer::createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::First, 0, sizeof(Out)); auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); }; InX::DynStrTab = make(".dynstr", true); InX::Dynamic = make>(); In::RelaDyn = make>( Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); InX::ShStrTab = make(".shstrtab", false); Out::ElfHeader = make("", 0, SHF_ALLOC); Out::ElfHeader->Size = sizeof(Elf_Ehdr); Out::ProgramHeaders = make("", 0, SHF_ALLOC); Out::ProgramHeaders->updateAlignment(Config->Wordsize); if (needsInterpSection()) { InX::Interp = createInterpSection(); Add(InX::Interp); } else { InX::Interp = nullptr; } if (Config->Strip != StripPolicy::All) { InX::StrTab = make(".strtab", false); InX::SymTab = make>(*InX::StrTab); } if (Config->BuildId != BuildIdKind::None) { InX::BuildId = make(); Add(InX::BuildId); } InX::Common = createCommonSection(); if (InX::Common) Add(InX::Common); InX::Bss = make(".bss"); Add(InX::Bss); InX::BssRelRo = make(".bss.rel.ro"); Add(InX::BssRelRo); // Add MIPS-specific sections. bool HasDynSymTab = !Symtab::X->getSharedFiles().empty() || Config->Pic || Config->ExportDynamic; if (Config->EMachine == EM_MIPS) { if (!Config->Shared && HasDynSymTab) { InX::MipsRldMap = make(); Add(InX::MipsRldMap); } if (auto *Sec = MipsAbiFlagsSection::create()) Add(Sec); if (auto *Sec = MipsOptionsSection::create()) Add(Sec); if (auto *Sec = MipsReginfoSection::create()) Add(Sec); } if (HasDynSymTab) { InX::DynSymTab = make>(*InX::DynStrTab); Add(InX::DynSymTab); In::VerSym = make>(); Add(In::VerSym); if (!Config->VersionDefinitions.empty()) { In::VerDef = make>(); Add(In::VerDef); } In::VerNeed = make>(); Add(In::VerNeed); if (Config->GnuHash) { InX::GnuHashTab = make(); Add(InX::GnuHashTab); } if (Config->SysvHash) { In::HashTab = make>(); Add(In::HashTab); } Add(InX::Dynamic); Add(InX::DynStrTab); Add(In::RelaDyn); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (Config->EMachine == EM_MIPS) { InX::MipsGot = make(); Add(InX::MipsGot); } else { InX::Got = make(); Add(InX::Got); } InX::GotPlt = make(); Add(InX::GotPlt); InX::IgotPlt = make(); Add(InX::IgotPlt); if (Config->GdbIndex) { InX::GdbIndex = make(); Add(InX::GdbIndex); } // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. In::RelaPlt = make>( Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/); Add(In::RelaPlt); // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure // that the IRelative relocations are processed last by the dynamic loader In::RelaIplt = make>( (Config->EMachine == EM_ARM) ? ".rel.dyn" : In::RelaPlt->Name, false /*Sort*/); Add(In::RelaIplt); InX::Plt = make(Target->PltHeaderSize); Add(InX::Plt); InX::Iplt = make(0); Add(InX::Iplt); if (!Config->Relocatable) { if (Config->EhFrameHdr) { In::EhFrameHdr = make>(); Add(In::EhFrameHdr); } In::EhFrame = make>(); Add(In::EhFrame); } if (InX::SymTab) Add(InX::SymTab); Add(InX::ShStrTab); if (InX::StrTab) Add(InX::StrTab); } static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, const SymbolBody &B) { if (B.isFile() || B.isSection()) return false; // If sym references a section in a discarded group, don't keep it. if (Sec == &InputSection::Discarded) return false; if (Config->Discard == DiscardPolicy::None) return true; // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. // * The symbol is in a SHF_MERGE section, which is normally the reason for // the assembler keeping the .L symbol. if (!SymName.startswith(".L") && !SymName.empty()) return true; if (Config->Discard == DiscardPolicy::Locals) return false; return !Sec || !(Sec->Flags & SHF_MERGE); } static bool includeInSymtab(const SymbolBody &B) { if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) return false; if (auto *D = dyn_cast(&B)) { // Always include absolute symbols. SectionBase *Sec = D->Section; if (!Sec) return true; if (auto *IS = dyn_cast(Sec)) { Sec = IS->Repl; IS = cast(Sec); // Exclude symbols pointing to garbage-collected sections. if (!IS->Live) return false; } if (auto *S = dyn_cast(Sec)) if (!S->getSectionPiece(D->Value)->Live) return false; } return true; } // Local symbols are not in the linker's symbol table. This function scans // each object file's symbol table to copy local symbols to the output. template void Writer::copyLocalSymbols() { if (!InX::SymTab) return; for (elf::ObjectFile *F : Symtab::X->getObjectFiles()) { for (SymbolBody *B : F->getLocalSymbols()) { if (!B->IsLocal) fatal(toString(F) + ": broken object: getLocalSymbols returns a non-local symbol"); auto *DR = dyn_cast(B); // No reason to keep local undefined symbol in symtab. if (!DR) continue; if (!includeInSymtab(*B)) continue; SectionBase *Sec = DR->Section; if (!shouldKeepInSymtab(Sec, B->getName(), *B)) continue; InX::SymTab->addSymbol(B); } } } template void Writer::addSectionSymbols() { // Create one STT_SECTION symbol for each output section we might // have a relocation with. for (OutputSection *Sec : OutputSections) { if (Sec->Sections.empty()) continue; InputSection *IS = Sec->Sections[0]; if (isa(IS) || IS->Type == SHT_REL || IS->Type == SHT_RELA) continue; auto *Sym = make("", /*IsLocal=*/true, /*StOther=*/0, STT_SECTION, /*Value=*/0, /*Size=*/0, IS, nullptr); InX::SymTab->addSymbol(Sym); } } // Today's loaders have a feature to make segments read-only after // processing dynamic relocations to enhance security. PT_GNU_RELRO // is defined for that. // // This function returns true if a section needs to be put into a // PT_GNU_RELRO segment. bool elf::isRelroSection(const OutputSection *Sec) { if (!Config->ZRelro) return false; uint64_t Flags = Sec->Flags; // Non-allocatable or non-writable sections don't need RELRO because // they are not writable or not even mapped to memory in the first place. // RELRO is for sections that are essentially read-only but need to // be writable only at process startup to allow dynamic linker to // apply relocations. if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) return false; // Once initialized, TLS data segments are used as data templates // for a thread-local storage. For each new thread, runtime // allocates memory for a TLS and copy templates there. No thread // are supposed to use templates directly. Thus, it can be in RELRO. if (Flags & SHF_TLS) return true; // .init_array, .preinit_array and .fini_array contain pointers to // functions that are executed on process startup or exit. These // pointers are set by the static linker, and they are not expected // to change at runtime. But if you are an attacker, you could do // interesting things by manipulating pointers in .fini_array, for // example. So they are put into RELRO. uint32_t Type = Sec->Type; if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || Type == SHT_PREINIT_ARRAY) return true; // .got contains pointers to external symbols. They are resolved by // the dynamic linker when a module is loaded into memory, and after // that they are not expected to change. So, it can be in RELRO. if (InX::Got && Sec == InX::Got->getParent()) return true; // .got.plt contains pointers to external function symbols. They are // by default resolved lazily, so we usually cannot put it into RELRO. // However, if "-z now" is given, the lazy symbol resolution is // disabled, which enables us to put it into RELRO. if (Sec == InX::GotPlt->getParent()) return Config->ZNow; // .dynamic section contains data for the dynamic linker, and // there's no need to write to it at runtime, so it's better to put // it into RELRO. if (Sec == InX::Dynamic->getParent()) return true; // .bss.rel.ro is used for copy relocations for read-only symbols. // Since the dynamic linker needs to process copy relocations, the // section cannot be read-only, but once initialized, they shouldn't // change. if (Sec == InX::BssRelRo->getParent()) return true; // Sections with some special names are put into RELRO. This is a // bit unfortunate because section names shouldn't be significant in // ELF in spirit. But in reality many linker features depend on // magic section names. StringRef S = Sec->Name; return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || S == ".eh_frame" || S == ".openbsd.randomdata"; } // We compute a rank for each section. The rank indicates where the // section should be placed in the file. Instead of using simple // numbers (0,1,2...), we use a series of flags. One for each decision // point when placing the section. // Using flags has two key properties: // * It is easy to check if a give branch was taken. // * It is easy two see how similar two ranks are (see getRankProximity). enum RankFlags { RF_NOT_ADDR_SET = 1 << 16, RF_NOT_INTERP = 1 << 15, RF_NOT_ALLOC = 1 << 14, RF_WRITE = 1 << 13, RF_EXEC_WRITE = 1 << 12, RF_EXEC = 1 << 11, RF_NON_TLS_BSS = 1 << 10, RF_NON_TLS_BSS_RO = 1 << 9, RF_NOT_TLS = 1 << 8, RF_BSS = 1 << 7, RF_PPC_NOT_TOCBSS = 1 << 6, RF_PPC_OPD = 1 << 5, RF_PPC_TOCL = 1 << 4, RF_PPC_TOC = 1 << 3, RF_PPC_BRANCH_LT = 1 << 2, RF_MIPS_GPREL = 1 << 1, RF_MIPS_NOT_GOT = 1 << 0 }; static unsigned getSectionRank(const OutputSection *Sec) { unsigned Rank = 0; // We want to put section specified by -T option first, so we // can start assigning VA starting from them later. if (Config->SectionStartMap.count(Sec->Name)) return Rank; Rank |= RF_NOT_ADDR_SET; // Put .interp first because some loaders want to see that section // on the first page of the executable file when loaded into memory. if (Sec->Name == ".interp") return Rank; Rank |= RF_NOT_INTERP; // Allocatable sections go first to reduce the total PT_LOAD size and // so debug info doesn't change addresses in actual code. if (!(Sec->Flags & SHF_ALLOC)) return Rank | RF_NOT_ALLOC; // Sort sections based on their access permission in the following // order: R, RX, RWX, RW. This order is based on the following // considerations: // * Read-only sections come first such that they go in the // PT_LOAD covering the program headers at the start of the file. // * Read-only, executable sections come next, unless the // -no-rosegment option is used. // * Writable, executable sections follow such that .plt on // architectures where it needs to be writable will be placed // between .text and .data. // * Writable sections come last, such that .bss lands at the very // end of the last PT_LOAD. bool IsExec = Sec->Flags & SHF_EXECINSTR; bool IsWrite = Sec->Flags & SHF_WRITE; if (IsExec) { if (IsWrite) Rank |= RF_EXEC_WRITE; else if (!Config->SingleRoRx) Rank |= RF_EXEC; } else { if (IsWrite) Rank |= RF_WRITE; } // If we got here we know that both A and B are in the same PT_LOAD. bool IsTls = Sec->Flags & SHF_TLS; bool IsNoBits = Sec->Type == SHT_NOBITS; // The first requirement we have is to put (non-TLS) nobits sections last. The // reason is that the only thing the dynamic linker will see about them is a // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the // PT_LOAD, so that has to correspond to the nobits sections. bool IsNonTlsNoBits = IsNoBits && !IsTls; if (IsNonTlsNoBits) Rank |= RF_NON_TLS_BSS; // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo // sections after r/w ones, so that the RelRo sections are contiguous. bool IsRelRo = isRelroSection(Sec); if (IsNonTlsNoBits && !IsRelRo) Rank |= RF_NON_TLS_BSS_RO; if (!IsNonTlsNoBits && IsRelRo) Rank |= RF_NON_TLS_BSS_RO; // The TLS initialization block needs to be a single contiguous block in a R/W // PT_LOAD, so stick TLS sections directly before the other RelRo R/W // sections. The TLS NOBITS sections are placed here as they don't take up // virtual address space in the PT_LOAD. if (!IsTls) Rank |= RF_NOT_TLS; // Within the TLS initialization block, the non-nobits sections need to appear // first. if (IsNoBits) Rank |= RF_BSS; // // Some architectures have additional ordering restrictions for sections // // within the same PT_LOAD. if (Config->EMachine == EM_PPC64) { // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections // that we would like to make sure appear is a specific order to maximize // their coverage by a single signed 16-bit offset from the TOC base // pointer. Conversely, the special .tocbss section should be first among // all SHT_NOBITS sections. This will put it next to the loaded special // PPC64 sections (and, thus, within reach of the TOC base pointer). StringRef Name = Sec->Name; if (Name != ".tocbss") Rank |= RF_PPC_NOT_TOCBSS; if (Name == ".opd") Rank |= RF_PPC_OPD; if (Name == ".toc1") Rank |= RF_PPC_TOCL; if (Name == ".toc") Rank |= RF_PPC_TOC; if (Name == ".branch_lt") Rank |= RF_PPC_BRANCH_LT; } if (Config->EMachine == EM_MIPS) { // All sections with SHF_MIPS_GPREL flag should be grouped together // because data in these sections is addressable with a gp relative address. if (Sec->Flags & SHF_MIPS_GPREL) Rank |= RF_MIPS_GPREL; if (Sec->Name != ".got") Rank |= RF_MIPS_NOT_GOT; } return Rank; } static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) { const OutputSection *A = cast(ACmd)->Sec; const OutputSection *B = cast(BCmd)->Sec; if (A->SortRank != B->SortRank) return A->SortRank < B->SortRank; if (!(A->SortRank & RF_NOT_ADDR_SET)) return Config->SectionStartMap.lookup(A->Name) < Config->SectionStartMap.lookup(B->Name); return false; } void PhdrEntry::add(OutputSection *Sec) { Last = Sec; if (!First) First = Sec; p_align = std::max(p_align, Sec->Alignment); if (p_type == PT_LOAD) Sec->FirstInPtLoad = First; } template static Symbol *addRegular(StringRef Name, SectionBase *Sec, uint64_t Value, uint8_t StOther = STV_HIDDEN, uint8_t Binding = STB_WEAK) { // The linker generated symbols are added as STB_WEAK to allow user defined // ones to override them. return Symtab::X->addRegular(Name, StOther, STT_NOTYPE, Value, /*Size=*/0, Binding, Sec, /*File=*/nullptr); } template static DefinedRegular * addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val, uint8_t StOther = STV_HIDDEN, uint8_t Binding = STB_GLOBAL) { SymbolBody *S = Symtab::X->find(Name); if (!S) return nullptr; if (S->isInCurrentDSO()) return nullptr; return cast( addRegular(Name, Sec, Val, StOther, Binding)->body()); } // The beginning and the ending of .rel[a].plt section are marked // with __rel[a]_iplt_{start,end} symbols if it is a statically linked // executable. The runtime needs these symbols in order to resolve // all IRELATIVE relocs on startup. For dynamic executables, we don't // need these symbols, since IRELATIVE relocs are resolved through GOT // and PLT. For details, see http://www.airs.com/blog/archives/403. template void Writer::addRelIpltSymbols() { if (InX::DynSymTab) return; StringRef S = Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start"; addOptionalRegular(S, In::RelaIplt, 0, STV_HIDDEN, STB_WEAK); S = Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end"; addOptionalRegular(S, In::RelaIplt, -1, STV_HIDDEN, STB_WEAK); } // The linker is expected to define some symbols depending on // the linking result. This function defines such symbols. template void Writer::addReservedSymbols() { if (Config->EMachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer // so that it points to an absolute address which by default is relative // to GOT. Default offset is 0x7ff0. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf ElfSym::MipsGp = Symtab::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between // start of function and 'gp' pointer into GOT. if (Symtab::X->find("_gp_disp")) ElfSym::MipsGpDisp = Symtab::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html if (Symtab::X->find("__gnu_local_gp")) ElfSym::MipsLocalGp = Symtab::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); } // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to // be at some offset from the base of the .got section, usually 0 or the end // of the .got InputSection *GotSection = InX::MipsGot ? cast(InX::MipsGot) : cast(InX::Got); ElfSym::GlobalOffsetTable = addOptionalRegular( "_GLOBAL_OFFSET_TABLE_", GotSection, Target->GotBaseSymOff); // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For // static linking the linker is required to optimize away any references to // __tls_get_addr, so it's not defined anywhere. Create a hidden definition // to avoid the undefined symbol error. if (!InX::DynSymTab) Symtab::X->addIgnored("__tls_get_addr"); // __ehdr_start is the location of ELF file headers. Note that we define // this symbol unconditionally even when using a linker script, which // differs from the behavior implemented by GNU linker which only define // this symbol if ELF headers are in the memory mapped segment. // __executable_start is not documented, but the expectation of at // least the android libc is that it points to the elf header too. // __dso_handle symbol is passed to cxa_finalize as a marker to identify // each DSO. The address of the symbol doesn't matter as long as they are // different in different DSOs, so we chose the start address of the DSO. for (const char *Name : {"__ehdr_start", "__executable_start", "__dso_handle"}) addOptionalRegular(Name, Out::ElfHeader, 0, STV_HIDDEN); // If linker script do layout we do not need to create any standart symbols. if (Script->Opt.HasSections) return; auto Add = [](StringRef S) { return addOptionalRegular(S, Out::ElfHeader, 0, STV_DEFAULT); }; ElfSym::Bss = Add("__bss_start"); ElfSym::End1 = Add("end"); ElfSym::End2 = Add("_end"); ElfSym::Etext1 = Add("etext"); ElfSym::Etext2 = Add("_etext"); ElfSym::Edata1 = Add("edata"); ElfSym::Edata2 = Add("_edata"); } // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). static void sortInitFini(OutputSection *S) { if (S) reinterpret_cast(S)->sortInitFini(); } // Sort input sections by the special rule for .ctors and .dtors. static void sortCtorsDtors(OutputSection *S) { if (S) reinterpret_cast(S)->sortCtorsDtors(); } // Sort input sections using the list provided by --symbol-ordering-file. template static void sortBySymbolsOrder(ArrayRef OutputSections) { if (Config->SymbolOrderingFile.empty()) return; // Build a map from symbols to their priorities. Symbols that didn't // appear in the symbol ordering file have the lowest priority 0. // All explicitly mentioned symbols have negative (higher) priorities. DenseMap SymbolOrder; int Priority = -Config->SymbolOrderingFile.size(); for (StringRef S : Config->SymbolOrderingFile) SymbolOrder.insert({S, Priority++}); // Build a map from sections to their priorities. DenseMap SectionOrder; for (elf::ObjectFile *File : Symtab::X->getObjectFiles()) { for (SymbolBody *Body : File->getSymbols()) { auto *D = dyn_cast(Body); if (!D || !D->Section) continue; int &Priority = SectionOrder[D->Section]; Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); } } // Sort sections by priority. for (OutputSection *Base : OutputSections) if (auto *Sec = dyn_cast(Base)) Sec->sort([&](InputSectionBase *S) { return SectionOrder.lookup(S); }); } template void Writer::forEachRelSec(std::function Fn) { for (InputSectionBase *IS : InputSections) { if (!IS->Live) continue; // Scan all relocations. Each relocation goes through a series // of tests to determine if it needs special treatment, such as // creating GOT, PLT, copy relocations, etc. // Note that relocations for non-alloc sections are directly // processed by InputSection::relocateNonAlloc. if (!(IS->Flags & SHF_ALLOC)) continue; if (isa(IS) || isa(IS)) Fn(*IS); } if (!Config->Relocatable) { for (EhInputSection *ES : In::EhFrame->Sections) Fn(*ES); } } template void Writer::createSections() { for (InputSectionBase *IS : InputSections) if (IS) Factory.addInputSec(IS, getOutputSectionName(IS->Name)); sortBySymbolsOrder(OutputSections); sortInitFini(findSection(".init_array")); sortInitFini(findSection(".fini_array")); sortCtorsDtors(findSection(".ctors")); sortCtorsDtors(findSection(".dtors")); } // We want to find how similar two ranks are. // The more branches in getSectionRank that match, the more similar they are. // Since each branch corresponds to a bit flag, we can just use // countLeadingZeros. static int getRankProximity(OutputSection *A, OutputSection *B) { return countLeadingZeros(A->SortRank ^ B->SortRank); } static int getRankProximity(OutputSection *A, BaseCommand *B) { if (auto *Cmd = dyn_cast(B)) if (Cmd->Sec) return getRankProximity(A, Cmd->Sec); return -1; } // When placing orphan sections, we want to place them after symbol assignments // so that an orphan after // begin_foo = .; // foo : { *(foo) } // end_foo = .; // doesn't break the intended meaning of the begin/end symbols. // We don't want to go over sections since findOrphanPos is the // one in charge of deciding the order of the sections. // We don't want to go over changes to '.', since doing so in // rx_sec : { *(rx_sec) } // . = ALIGN(0x1000); // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. static bool shouldSkip(BaseCommand *Cmd) { if (isa(Cmd)) return false; if (auto *Assign = dyn_cast(Cmd)) return Assign->Name != "."; return true; } // We want to place orphan sections so that they share as much // characteristics with their neighbors as possible. For example, if // both are rw, or both are tls. template static std::vector::iterator findOrphanPos(std::vector::iterator B, std::vector::iterator E) { OutputSection *Sec = cast(*E)->Sec; // Find the first element that has as close a rank as possible. auto I = std::max_element(B, E, [=](BaseCommand *A, BaseCommand *B) { return getRankProximity(Sec, A) < getRankProximity(Sec, B); }); if (I == E) return E; // Consider all existing sections with the same proximity. int Proximity = getRankProximity(Sec, *I); for (; I != E; ++I) { auto *Cmd = dyn_cast(*I); if (!Cmd || !Cmd->Sec) continue; if (getRankProximity(Sec, Cmd->Sec) != Proximity || Sec->SortRank < Cmd->Sec->SortRank) break; } auto J = std::find_if( llvm::make_reverse_iterator(I), llvm::make_reverse_iterator(B), [](BaseCommand *Cmd) { return isa(Cmd); }); I = J.base(); while (I != E && shouldSkip(*I)) ++I; return I; } template void Writer::sortSections() { + if (Script->Opt.HasSections) + Script->adjustSectionsBeforeSorting(); + // Don't sort if using -r. It is not necessary and we want to preserve the // relative order for SHF_LINK_ORDER sections. if (Config->Relocatable) return; - if (Script->Opt.HasSections) - Script->adjustSectionsBeforeSorting(); - for (BaseCommand *Base : Script->Opt.Commands) if (auto *Cmd = dyn_cast(Base)) if (OutputSection *Sec = Cmd->Sec) Sec->SortRank = getSectionRank(Sec); if (!Script->Opt.HasSections) { // We know that all the OutputSectionCommands are contiguous in // this case. auto E = Script->Opt.Commands.end(); auto I = Script->Opt.Commands.begin(); auto IsSection = [](BaseCommand *Base) { return isa(Base); }; I = std::find_if(I, E, IsSection); E = std::find_if(llvm::make_reverse_iterator(E), llvm::make_reverse_iterator(I), IsSection) .base(); std::stable_sort(I, E, compareSections); return; } // Orphan sections are sections present in the input files which are // not explicitly placed into the output file by the linker script. // // The sections in the linker script are already in the correct // order. We have to figuere out where to insert the orphan // sections. // // The order of the sections in the script is arbitrary and may not agree with // compareSections. This means that we cannot easily define a strict weak // ordering. To see why, consider a comparison of a section in the script and // one not in the script. We have a two simple options: // * Make them equivalent (a is not less than b, and b is not less than a). // The problem is then that equivalence has to be transitive and we can // have sections a, b and c with only b in a script and a less than c // which breaks this property. // * Use compareSectionsNonScript. Given that the script order doesn't have // to match, we can end up with sections a, b, c, d where b and c are in the // script and c is compareSectionsNonScript less than b. In which case d // can be equivalent to c, a to b and d < a. As a concrete example: // .a (rx) # not in script // .b (rx) # in script // .c (ro) # in script // .d (ro) # not in script // // The way we define an order then is: // * Sort only the orphan sections. They are in the end right now. // * Move each orphan section to its preferred position. We try // to put each section in the last position where it it can share // a PT_LOAD. // // There is some ambiguity as to where exactly a new entry should be // inserted, because Opt.Commands contains not only output section // commands but also other types of commands such as symbol assignment // expressions. There's no correct answer here due to the lack of the // formal specification of the linker script. We use heuristics to // determine whether a new output command should be added before or // after another commands. For the details, look at shouldSkip // function. auto I = Script->Opt.Commands.begin(); auto E = Script->Opt.Commands.end(); auto NonScriptI = std::find_if(I, E, [](BaseCommand *Base) { if (auto *Cmd = dyn_cast(Base)) return Cmd->Sec && Cmd->Sec->SectionIndex == INT_MAX; return false; }); // Sort the orphan sections. std::stable_sort(NonScriptI, E, compareSections); // As a horrible special case, skip the first . assignment if it is before any // section. We do this because it is common to set a load address by starting // the script with ". = 0xabcd" and the expectation is that every section is // after that. auto FirstSectionOrDotAssignment = std::find_if(I, E, [](BaseCommand *Cmd) { return !shouldSkip(Cmd); }); if (FirstSectionOrDotAssignment != E && isa(**FirstSectionOrDotAssignment)) ++FirstSectionOrDotAssignment; I = FirstSectionOrDotAssignment; while (NonScriptI != E) { auto Pos = findOrphanPos(I, NonScriptI); OutputSection *Orphan = cast(*NonScriptI)->Sec; // As an optimization, find all sections with the same sort rank // and insert them with one rotate. unsigned Rank = Orphan->SortRank; auto End = std::find_if(NonScriptI + 1, E, [=](BaseCommand *Cmd) { return cast(Cmd)->Sec->SortRank != Rank; }); std::rotate(Pos, NonScriptI, End); NonScriptI = End; } Script->adjustSectionsAfterSorting(); } static void applySynthetic(const std::vector &Sections, std::function Fn) { for (SyntheticSection *SS : Sections) if (SS && SS->getParent() && !SS->empty()) Fn(SS); } // We need to add input synthetic sections early in createSyntheticSections() // to make them visible from linkescript side. But not all sections are always // required to be in output. For example we don't need dynamic section content // sometimes. This function filters out such unused sections from the output. static void removeUnusedSyntheticSections(std::vector &V) { // All input synthetic sections that can be empty are placed after // all regular ones. We iterate over them all and exit at first // non-synthetic. for (InputSectionBase *S : llvm::reverse(InputSections)) { SyntheticSection *SS = dyn_cast(S); if (!SS) return; OutputSection *OS = SS->getParent(); if (!SS->empty() || !OS) continue; if ((SS == InX::Got || SS == InX::MipsGot) && ElfSym::GlobalOffsetTable) continue; OS->Sections.erase(std::find(OS->Sections.begin(), OS->Sections.end(), SS)); SS->Live = false; // If there are no other sections in the output section, remove it from the // output. if (OS->Sections.empty()) V.erase(std::find(V.begin(), V.end(), OS)); } } // Create output section objects and add them to OutputSections. template void Writer::finalizeSections() { Out::DebugInfo = findSection(".debug_info"); Out::PreinitArray = findSection(".preinit_array"); Out::InitArray = findSection(".init_array"); Out::FiniArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!Config->Relocatable) { addStartEndSymbols(); for (OutputSection *Sec : OutputSections) addStartStopSymbols(Sec); } // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. // It should be okay as no one seems to care about the type. // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (InX::DynSymTab) addRegular("_DYNAMIC", InX::Dynamic, 0); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); // This responsible for splitting up .eh_frame section into // pieces. The relocation scan uses those pieces, so this has to be // earlier. applySynthetic({In::EhFrame}, [](SyntheticSection *SS) { SS->finalizeContents(); }); // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. forEachRelSec(scanRelocations); if (InX::Plt && !InX::Plt->empty()) InX::Plt->addSymbols(); if (InX::Iplt && !InX::Iplt->empty()) InX::Iplt->addSymbols(); // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *S : Symtab::X->getSymbols()) { SymbolBody *Body = S->body(); if (!includeInSymtab(*Body)) continue; if (InX::SymTab) InX::SymTab->addSymbol(Body); if (InX::DynSymTab && S->includeInDynsym()) { InX::DynSymTab->addSymbol(Body); if (auto *SS = dyn_cast(Body)) if (cast>(SS->File)->isNeeded()) In::VerNeed->addSymbol(SS); } } // Do not proceed if there was an undefined symbol. if (ErrorCount) return; addPredefinedSections(); removeUnusedSyntheticSections(OutputSections); clearOutputSections(); sortSections(); // Now that we have the final list, create a list of all the // OutputSectionCommands for convenience. for (BaseCommand *Base : Script->Opt.Commands) if (auto *Cmd = dyn_cast(Base)) OutputSectionCommands.push_back(Cmd); // Prefer command line supplied address over other constraints. for (OutputSectionCommand *Cmd : OutputSectionCommands) { auto I = Config->SectionStartMap.find(Cmd->Name); if (I != Config->SectionStartMap.end()) Cmd->AddrExpr = [=] { return I->second; }; } // This is a bit of a hack. A value of 0 means undef, so we set it // to 1 t make __ehdr_start defined. The section number is not // particularly relevant. Out::ElfHeader->SectionIndex = 1; unsigned I = 1; for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; Sec->SectionIndex = I++; Sec->ShName = InX::ShStrTab->addString(Sec->Name); } // Binary and relocatable output does not have PHDRS. // The headers have to be created before finalize as that can influence the // image base and the dynamic section on mips includes the image base. if (!Config->Relocatable && !Config->OFormatBinary) { Phdrs = Script->hasPhdrsCommands() ? Script->createPhdrs() : createPhdrs(); addPtArmExid(Phdrs); Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); } // Compute the size of .rela.dyn and .rela.plt early since we need // them to populate .dynamic. for (SyntheticSection *SS : {In::RelaDyn, In::RelaPlt}) if (SS->getParent() && !SS->empty()) SS->getParent()->assignOffsets(); // Dynamic section must be the last one in this list and dynamic // symbol table section (DynSymTab) must be the first one. applySynthetic({InX::DynSymTab, InX::Bss, InX::BssRelRo, InX::GnuHashTab, In::HashTab, InX::SymTab, InX::ShStrTab, InX::StrTab, In::VerDef, InX::DynStrTab, InX::GdbIndex, InX::Got, InX::MipsGot, InX::IgotPlt, InX::GotPlt, In::RelaDyn, In::RelaIplt, In::RelaPlt, InX::Plt, InX::Iplt, In::EhFrameHdr, In::VerSym, In::VerNeed, InX::Dynamic}, [](SyntheticSection *SS) { SS->finalizeContents(); }); // Some architectures use small displacements for jump instructions. // It is linker's responsibility to create thunks containing long // jump instructions if jump targets are too far. Create thunks. if (Target->NeedsThunks) { // FIXME: only ARM Interworking and Mips LA25 Thunks are implemented, // these // do not require address information. To support range extension Thunks // we need to assign addresses so that we can tell if jump instructions // are out of range. This will need to turn into a loop that converges // when no more Thunks are added ThunkCreator TC; if (TC.createThunks(OutputSectionCommands)) { applySynthetic({InX::MipsGot}, [](SyntheticSection *SS) { SS->updateAllocSize(); }); if (TC.createThunks(OutputSectionCommands)) fatal("All non-range thunks should be created in first call"); } } // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. for (OutputSectionCommand *Cmd : OutputSectionCommands) Cmd->finalize(); // createThunks may have added local symbols to the static symbol table applySynthetic({InX::SymTab, InX::ShStrTab, InX::StrTab}, [](SyntheticSection *SS) { SS->postThunkContents(); }); } template void Writer::addPredefinedSections() { // ARM ABI requires .ARM.exidx to be terminated by some piece of data. // We have the terminater synthetic section class. Add that at the end. auto *OS = dyn_cast_or_null(findSection(".ARM.exidx")); if (!OS || OS->Sections.empty() || Config->Relocatable) return; auto *Sentinel = make(); OS->addSection(Sentinel); // If there are linker script commands existing at this point then add the // sentinel to the last of these too. if (OutputSectionCommand *C = Script->getCmd(OS)) { auto ISD = std::find_if(C->Commands.rbegin(), C->Commands.rend(), [](const BaseCommand *Base) { return isa(Base); }); cast(*ISD)->Sections.push_back(Sentinel); } } // The linker is expected to define SECNAME_start and SECNAME_end // symbols for a few sections. This function defines them. template void Writer::addStartEndSymbols() { auto Define = [&](StringRef Start, StringRef End, OutputSection *OS) { // These symbols resolve to the image base if the section does not exist. // A special value -1 indicates end of the section. if (OS) { addOptionalRegular(Start, OS, 0); addOptionalRegular(End, OS, -1); } else { if (Config->Pic) OS = Out::ElfHeader; addOptionalRegular(Start, OS, 0); addOptionalRegular(End, OS, 0); } }; Define("__preinit_array_start", "__preinit_array_end", Out::PreinitArray); Define("__init_array_start", "__init_array_end", Out::InitArray); Define("__fini_array_start", "__fini_array_end", Out::FiniArray); if (OutputSection *Sec = findSection(".ARM.exidx")) Define("__exidx_start", "__exidx_end", Sec); } // If a section name is valid as a C identifier (which is rare because of // the leading '.'), linkers are expected to define __start_ and // __stop_ symbols. They are at beginning and end of the section, // respectively. This is not requested by the ELF standard, but GNU ld and // gold provide the feature, and used by many programs. template void Writer::addStartStopSymbols(OutputSection *Sec) { StringRef S = Sec->Name; if (!isValidCIdentifier(S)) return; addOptionalRegular(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); addOptionalRegular(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); } template OutputSectionCommand *Writer::findSectionCommand(StringRef Name) { for (OutputSectionCommand *Cmd : OutputSectionCommands) if (Cmd->Name == Name) return Cmd; return nullptr; } template OutputSection *Writer::findSectionInScript(StringRef Name) { if (OutputSectionCommand *Cmd = findSectionCommand(Name)) return Cmd->Sec; return nullptr; } template OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->Name == Name) return Sec; return nullptr; } static bool needsPtLoad(OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC)) return false; // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is // responsible for allocating space for them, not the PT_LOAD that // contains the TLS initialization image. if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) return false; return true; } // Linker scripts are responsible for aligning addresses. Unfortunately, most // linker scripts are designed for creating two PT_LOADs only, one RX and one // RW. This means that there is no alignment in the RO to RX transition and we // cannot create a PT_LOAD there. static uint64_t computeFlags(uint64_t Flags) { if (Config->Omagic) return PF_R | PF_W | PF_X; if (Config->SingleRoRx && !(Flags & PF_W)) return Flags | PF_X; return Flags; } // Decide which program headers to create and which sections to include in each // one. template std::vector Writer::createPhdrs() { std::vector Ret; auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { Ret.emplace_back(Type, Flags); return &Ret.back(); }; // The first phdr entry is PT_PHDR which describes the program header itself. AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders); // PT_INTERP must be the second entry if exists. if (OutputSection *Sec = findSectionInScript(".interp")) AddHdr(PT_INTERP, Sec->getPhdrFlags())->add(Sec); // Add the first PT_LOAD segment for regular output sections. uint64_t Flags = computeFlags(PF_R); PhdrEntry *Load = AddHdr(PT_LOAD, Flags); // Add the headers. We will remove them if they don't fit. Load->add(Out::ElfHeader); Load->add(Out::ProgramHeaders); for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (!(Sec->Flags & SHF_ALLOC)) break; if (!needsPtLoad(Sec)) continue; // Segments are contiguous memory regions that has the same attributes // (e.g. executable or writable). There is one phdr for each segment. // Therefore, we need to create a new phdr when the next section has // different flags or is loaded at a discontiguous address using AT linker // script command. uint64_t NewFlags = computeFlags(Sec->getPhdrFlags()); if (Script->hasLMA(Sec) || Flags != NewFlags) { Load = AddHdr(PT_LOAD, NewFlags); Flags = NewFlags; } Load->add(Sec); } // Add a TLS segment if any. PhdrEntry TlsHdr(PT_TLS, PF_R); for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec->Flags & SHF_TLS) TlsHdr.add(Sec); } if (TlsHdr.First) Ret.push_back(std::move(TlsHdr)); // Add an entry for .dynamic. if (InX::DynSymTab) AddHdr(PT_DYNAMIC, InX::Dynamic->getParent()->getPhdrFlags()) ->add(InX::Dynamic->getParent()); // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after proccessing relocations. PhdrEntry RelRo(PT_GNU_RELRO, PF_R); for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (needsPtLoad(Sec) && isRelroSection(Sec)) RelRo.add(Sec); } if (RelRo.First) Ret.push_back(std::move(RelRo)); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. if (!In::EhFrame->empty() && In::EhFrameHdr && In::EhFrame->getParent() && In::EhFrameHdr->getParent()) AddHdr(PT_GNU_EH_FRAME, In::EhFrameHdr->getParent()->getPhdrFlags()) ->add(In::EhFrameHdr->getParent()); // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes // the dynamic linker fill the segment with random data. if (OutputSection *Sec = findSectionInScript(".openbsd.randomdata")) AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags())->add(Sec); // PT_GNU_STACK is a special section to tell the loader to make the // pages for the stack non-executable. If you really want an executable // stack, you can pass -z execstack, but that's not recommended for // security reasons. unsigned Perm; if (Config->ZExecstack) Perm = PF_R | PF_W | PF_X; else Perm = PF_R | PF_W; AddHdr(PT_GNU_STACK, Perm)->p_memsz = Config->ZStackSize; // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable // is expected to perform W^X violations, such as calling mprotect(2) or // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on // OpenBSD. if (Config->ZWxneeded) AddHdr(PT_OPENBSD_WXNEEDED, PF_X); // Create one PT_NOTE per a group of contiguous .note sections. PhdrEntry *Note = nullptr; for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec->Type == SHT_NOTE) { if (!Note || Script->hasLMA(Sec)) Note = AddHdr(PT_NOTE, PF_R); Note->add(Sec); } else { Note = nullptr; } } return Ret; } template void Writer::addPtArmExid(std::vector &Phdrs) { if (Config->EMachine != EM_ARM) return; auto I = std::find_if(OutputSectionCommands.begin(), OutputSectionCommands.end(), [](OutputSectionCommand *Cmd) { return Cmd->Sec->Type == SHT_ARM_EXIDX; }); if (I == OutputSectionCommands.end()) return; // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME PhdrEntry ARMExidx(PT_ARM_EXIDX, PF_R); ARMExidx.add((*I)->Sec); Phdrs.push_back(ARMExidx); } // The first section of each PT_LOAD, the first section in PT_GNU_RELRO and the // first section after PT_GNU_RELRO have to be page aligned so that the dynamic // linker can set the permissions. template void Writer::fixSectionAlignments() { auto PageAlign = [](OutputSection *Sec) { OutputSectionCommand *Cmd = Script->getCmd(Sec); if (Cmd && !Cmd->AddrExpr) Cmd->AddrExpr = [=] { return alignTo(Script->getDot(), Config->MaxPageSize); }; }; for (const PhdrEntry &P : Phdrs) if (P.p_type == PT_LOAD && P.First) PageAlign(P.First); for (const PhdrEntry &P : Phdrs) { if (P.p_type != PT_GNU_RELRO) continue; if (P.First) PageAlign(P.First); // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we // have to align it to a page. auto End = OutputSectionCommands.end(); auto I = std::find(OutputSectionCommands.begin(), End, Script->getCmd(P.Last)); if (I == End || (I + 1) == End) continue; OutputSection *Sec = (*(I + 1))->Sec; if (needsPtLoad(Sec)) PageAlign(Sec); } } // Adjusts the file alignment for a given output section and returns // its new file offset. The file offset must be the same with its // virtual address (modulo the page size) so that the loader can load // executables without any address adjustment. static uint64_t getFileAlignment(uint64_t Off, OutputSection *Sec) { OutputSection *First = Sec->FirstInPtLoad; // If the section is not in a PT_LOAD, we just have to align it. if (!First) return alignTo(Off, Sec->Alignment); // The first section in a PT_LOAD has to have congruent offset and address // module the page size. if (Sec == First) return alignTo(Off, Config->MaxPageSize, Sec->Addr); // If two sections share the same PT_LOAD the file offset is calculated // using this formula: Off2 = Off1 + (VA2 - VA1). return First->Offset + Sec->Addr - First->Addr; } static uint64_t setOffset(OutputSection *Sec, uint64_t Off) { if (Sec->Type == SHT_NOBITS) { Sec->Offset = Off; return Off; } Off = getFileAlignment(Off, Sec); Sec->Offset = Off; return Off + Sec->Size; } template void Writer::assignFileOffsetsBinary() { uint64_t Off = 0; for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec->Flags & SHF_ALLOC) Off = setOffset(Sec, Off); } FileSize = alignTo(Off, Config->Wordsize); } // Assign file offsets to output sections. template void Writer::assignFileOffsets() { uint64_t Off = 0; Off = setOffset(Out::ElfHeader, Off); Off = setOffset(Out::ProgramHeaders, Off); for (OutputSectionCommand *Cmd : OutputSectionCommands) Off = setOffset(Cmd->Sec, Off); SectionHeaderOff = alignTo(Off, Config->Wordsize); FileSize = SectionHeaderOff + (OutputSectionCommands.size() + 1) * sizeof(Elf_Shdr); } // Finalize the program headers. We call this function after we assign // file offsets and VAs to all sections. template void Writer::setPhdrs() { for (PhdrEntry &P : Phdrs) { OutputSection *First = P.First; OutputSection *Last = P.Last; if (First) { P.p_filesz = Last->Offset - First->Offset; if (Last->Type != SHT_NOBITS) P.p_filesz += Last->Size; P.p_memsz = Last->Addr + Last->Size - First->Addr; P.p_offset = First->Offset; P.p_vaddr = First->Addr; if (!P.HasLMA) P.p_paddr = First->getLMA(); } if (P.p_type == PT_LOAD) P.p_align = Config->MaxPageSize; else if (P.p_type == PT_GNU_RELRO) { P.p_align = 1; // The glibc dynamic loader rounds the size down, so we need to round up // to protect the last page. This is a no-op on FreeBSD which always // rounds up. P.p_memsz = alignTo(P.p_memsz, Target->PageSize); } // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. if (P.p_type == PT_TLS) { Out::TlsPhdr = &P; if (P.p_memsz) P.p_memsz = alignTo(P.p_memsz, P.p_align); } } } // The entry point address is chosen in the following ways. // // 1. the '-e' entry command-line option; // 2. the ENTRY(symbol) command in a linker control script; // 3. the value of the symbol start, if present; // 4. the address of the first byte of the .text section, if present; // 5. the address 0. template uint64_t Writer::getEntryAddr() { // Case 1, 2 or 3. As a special case, if the symbol is actually // a number, we'll use that number as an address. if (SymbolBody *B = Symtab::X->find(Config->Entry)) return B->getVA(); uint64_t Addr; if (to_integer(Config->Entry, Addr)) return Addr; // Case 4 if (OutputSection *Sec = findSectionInScript(".text")) { if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + utohexstr(Sec->Addr)); return Sec->Addr; } // Case 5 if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; not setting start address"); return 0; } static uint16_t getELFType() { if (Config->Pic) return ET_DYN; if (Config->Relocatable) return ET_REL; return ET_EXEC; } // This function is called after we have assigned address and size // to each section. This function fixes some predefined // symbol values that depend on section address and size. template void Writer::fixPredefinedSymbols() { // _etext is the first location after the last read-only loadable segment. // _edata is the first location after the last read-write loadable segment. // _end is the first location after the uninitialized data region. PhdrEntry *Last = nullptr; PhdrEntry *LastRO = nullptr; PhdrEntry *LastRW = nullptr; for (PhdrEntry &P : Phdrs) { if (P.p_type != PT_LOAD) continue; Last = &P; if (P.p_flags & PF_W) LastRW = &P; else LastRO = &P; } auto Set = [](DefinedRegular *S, OutputSection *Sec, uint64_t Value) { if (S) { S->Section = Sec; S->Value = Value; } }; if (Last) { Set(ElfSym::End1, Last->First, Last->p_memsz); Set(ElfSym::End2, Last->First, Last->p_memsz); } if (LastRO) { Set(ElfSym::Etext1, LastRO->First, LastRO->p_filesz); Set(ElfSym::Etext2, LastRO->First, LastRO->p_filesz); } if (LastRW) { Set(ElfSym::Edata1, LastRW->First, LastRW->p_filesz); Set(ElfSym::Edata2, LastRW->First, LastRW->p_filesz); } if (ElfSym::Bss) ElfSym::Bss->Section = findSectionInScript(".bss"); // Setup MIPS _gp_disp/__gnu_local_gp symbols which should // be equal to the _gp symbol's value. if (Config->EMachine == EM_MIPS && !ElfSym::MipsGp->Value) { // Find GP-relative section with the lowest address // and use this address to calculate default _gp value. for (const OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *OS = Cmd->Sec; if (OS->Flags & SHF_MIPS_GPREL) { ElfSym::MipsGp->Value = OS->Addr + 0x7ff0; break; } } } } template void Writer::writeHeader() { uint8_t *Buf = Buffer->getBufferStart(); memcpy(Buf, "\177ELF", 4); // Write the ELF header. auto *EHdr = reinterpret_cast(Buf); EHdr->e_ident[EI_CLASS] = Config->Is64 ? ELFCLASS64 : ELFCLASS32; EHdr->e_ident[EI_DATA] = Config->IsLE ? ELFDATA2LSB : ELFDATA2MSB; EHdr->e_ident[EI_VERSION] = EV_CURRENT; EHdr->e_ident[EI_OSABI] = Config->OSABI; EHdr->e_type = getELFType(); EHdr->e_machine = Config->EMachine; EHdr->e_version = EV_CURRENT; EHdr->e_entry = getEntryAddr(); EHdr->e_shoff = SectionHeaderOff; EHdr->e_ehsize = sizeof(Elf_Ehdr); EHdr->e_phnum = Phdrs.size(); EHdr->e_shentsize = sizeof(Elf_Shdr); EHdr->e_shnum = OutputSectionCommands.size() + 1; EHdr->e_shstrndx = InX::ShStrTab->getParent()->SectionIndex; if (Config->EMachine == EM_ARM) // We don't currently use any features incompatible with EF_ARM_EABI_VER5, // but we don't have any firm guarantees of conformance. Linux AArch64 // kernels (as of 2016) require an EABI version to be set. EHdr->e_flags = EF_ARM_EABI_VER5; else if (Config->EMachine == EM_MIPS) EHdr->e_flags = getMipsEFlags(); if (!Config->Relocatable) { EHdr->e_phoff = sizeof(Elf_Ehdr); EHdr->e_phentsize = sizeof(Elf_Phdr); } // Write the program header table. auto *HBuf = reinterpret_cast(Buf + EHdr->e_phoff); for (PhdrEntry &P : Phdrs) { HBuf->p_type = P.p_type; HBuf->p_flags = P.p_flags; HBuf->p_offset = P.p_offset; HBuf->p_vaddr = P.p_vaddr; HBuf->p_paddr = P.p_paddr; HBuf->p_filesz = P.p_filesz; HBuf->p_memsz = P.p_memsz; HBuf->p_align = P.p_align; ++HBuf; } // Write the section header table. Note that the first table entry is null. auto *SHdrs = reinterpret_cast(Buf + EHdr->e_shoff); for (OutputSectionCommand *Cmd : OutputSectionCommands) Cmd->Sec->writeHeaderTo(++SHdrs); } // Open a result file. template void Writer::openFile() { if (!Config->Is64 && FileSize > UINT32_MAX) { error("output file too large: " + Twine(FileSize) + " bytes"); return; } unlinkAsync(Config->OutputFile); ErrorOr> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, FileOutputBuffer::F_executable); if (auto EC = BufferOrErr.getError()) error("failed to open " + Config->OutputFile + ": " + EC.message()); else Buffer = std::move(*BufferOrErr); } template void Writer::writeSectionsBinary() { uint8_t *Buf = Buffer->getBufferStart(); for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec->Flags & SHF_ALLOC) Cmd->writeTo(Buf + Sec->Offset); } } // Write section contents to a mmap'ed file. template void Writer::writeSections() { uint8_t *Buf = Buffer->getBufferStart(); // PPC64 needs to process relocations in the .opd section // before processing relocations in code-containing sections. if (auto *OpdCmd = findSectionCommand(".opd")) { Out::Opd = OpdCmd->Sec; Out::OpdBuf = Buf + Out::Opd->Offset; OpdCmd->template writeTo(Buf + Out::Opd->Offset); } OutputSection *EhFrameHdr = (In::EhFrameHdr && !In::EhFrameHdr->empty()) ? In::EhFrameHdr->getParent() : nullptr; // In -r or -emit-relocs mode, write the relocation sections first as in // ELf_Rel targets we might find out that we need to modify the relocated // section while doing it. for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) Cmd->writeTo(Buf + Sec->Offset); } for (OutputSectionCommand *Cmd : OutputSectionCommands) { OutputSection *Sec = Cmd->Sec; if (Sec != Out::Opd && Sec != EhFrameHdr && Sec->Type != SHT_REL && Sec->Type != SHT_RELA) Cmd->writeTo(Buf + Sec->Offset); } // The .eh_frame_hdr depends on .eh_frame section contents, therefore // it should be written after .eh_frame is written. if (EhFrameHdr) { OutputSectionCommand *Cmd = Script->getCmd(EhFrameHdr); Cmd->writeTo(Buf + EhFrameHdr->Offset); } } template void Writer::writeBuildId() { if (!InX::BuildId || !InX::BuildId->getParent()) return; // Compute a hash of all sections of the output file. uint8_t *Start = Buffer->getBufferStart(); uint8_t *End = Start + FileSize; InX::BuildId->writeBuildId({Start, End}); } template void elf::writeResult(); template void elf::writeResult(); template void elf::writeResult(); template void elf::writeResult(); diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp index fe67fc88c467..5233e42e5fc5 100644 --- a/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp +++ b/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -1,844 +1,843 @@ //===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file For mach-o object files, this implementation uses YAML I/O to /// provide the convert between YAML and the normalized mach-o (NM). /// /// +------------+ +------+ /// | normalized | <-> | yaml | /// +------------+ +------+ #include "MachONormalizedFile.h" #include "lld/Core/Error.h" #include "lld/Core/LLVM.h" #include "lld/ReaderWriter/YamlContext.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include using llvm::StringRef; using namespace llvm::yaml; using namespace llvm::MachO; using namespace lld::mach_o::normalized; using lld::YamlContext; LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) LLVM_YAML_IS_SEQUENCE_VECTOR(Export) -LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) // for compatibility with gcc-4.7 in C++11 mode, add extra namespace namespace llvm { namespace yaml { // A vector of Sections is a sequence. template<> struct SequenceTraits< std::vector
> { static size_t size(IO &io, std::vector
&seq) { return seq.size(); } static Section& element(IO &io, std::vector
&seq, size_t index) { if ( index >= seq.size() ) seq.resize(index+1); return seq[index]; } }; template<> struct SequenceTraits< std::vector > { static size_t size(IO &io, std::vector &seq) { return seq.size(); } static Symbol& element(IO &io, std::vector &seq, size_t index) { if ( index >= seq.size() ) seq.resize(index+1); return seq[index]; } }; // A vector of Relocations is a sequence. template<> struct SequenceTraits< Relocations > { static size_t size(IO &io, Relocations &seq) { return seq.size(); } static Relocation& element(IO &io, Relocations &seq, size_t index) { if ( index >= seq.size() ) seq.resize(index+1); return seq[index]; } }; // The content for a section is represented as a flow sequence of hex bytes. template<> struct SequenceTraits< ContentBytes > { static size_t size(IO &io, ContentBytes &seq) { return seq.size(); } static Hex8& element(IO &io, ContentBytes &seq, size_t index) { if ( index >= seq.size() ) seq.resize(index+1); return seq[index]; } static const bool flow = true; }; // The indirect symbols for a section is represented as a flow sequence // of numbers (symbol table indexes). template<> struct SequenceTraits< IndirectSymbols > { static size_t size(IO &io, IndirectSymbols &seq) { return seq.size(); } static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { if ( index >= seq.size() ) seq.resize(index+1); return seq[index]; } static const bool flow = true; }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { io.enumCase(value, "unknown", lld::MachOLinkingContext::OS::unknown); io.enumCase(value, "Mac OS X", lld::MachOLinkingContext::OS::macOSX); io.enumCase(value, "iOS", lld::MachOLinkingContext::OS::iOS); io.enumCase(value, "iOS Simulator", lld::MachOLinkingContext::OS::iOS_simulator); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, HeaderFileType &value) { io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); } }; template <> struct ScalarBitSetTraits { static void bitset(IO &io, FileFlags &value) { io.bitSetCase(value, "MH_TWOLEVEL", llvm::MachO::MH_TWOLEVEL); io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, SectionType &value) { io.enumCase(value, "S_REGULAR", llvm::MachO::S_REGULAR); io.enumCase(value, "S_ZEROFILL", llvm::MachO::S_ZEROFILL); io.enumCase(value, "S_CSTRING_LITERALS", llvm::MachO::S_CSTRING_LITERALS); io.enumCase(value, "S_4BYTE_LITERALS", llvm::MachO::S_4BYTE_LITERALS); io.enumCase(value, "S_8BYTE_LITERALS", llvm::MachO::S_8BYTE_LITERALS); io.enumCase(value, "S_LITERAL_POINTERS", llvm::MachO::S_LITERAL_POINTERS); io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", llvm::MachO::S_LAZY_SYMBOL_POINTERS); io.enumCase(value, "S_SYMBOL_STUBS", llvm::MachO::S_SYMBOL_STUBS); io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", llvm::MachO::S_MOD_INIT_FUNC_POINTERS); io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", llvm::MachO::S_MOD_TERM_FUNC_POINTERS); io.enumCase(value, "S_COALESCED", llvm::MachO::S_COALESCED); io.enumCase(value, "S_GB_ZEROFILL", llvm::MachO::S_GB_ZEROFILL); io.enumCase(value, "S_INTERPOSING", llvm::MachO::S_INTERPOSING); io.enumCase(value, "S_16BYTE_LITERALS", llvm::MachO::S_16BYTE_LITERALS); io.enumCase(value, "S_DTRACE_DOF", llvm::MachO::S_DTRACE_DOF); io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); io.enumCase(value, "S_THREAD_LOCAL_REGULAR", llvm::MachO::S_THREAD_LOCAL_REGULAR); io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", llvm::MachO::S_THREAD_LOCAL_ZEROFILL); io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", llvm::MachO::S_THREAD_LOCAL_VARIABLES); io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); } }; template <> struct ScalarBitSetTraits { static void bitset(IO &io, SectionAttr &value) { io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", llvm::MachO::S_ATTR_NO_DEAD_STRIP); io.bitSetCase(value, "S_ATTR_EXT_RELOC", llvm::MachO::S_ATTR_EXT_RELOC); io.bitSetCase(value, "S_ATTR_LOC_RELOC", llvm::MachO::S_ATTR_LOC_RELOC); io.bitSetCase(value, "S_ATTR_DEBUG", llvm::MachO::S_ATTR_DEBUG); } }; /// This is a custom formatter for SectionAlignment. Values are /// the power to raise by, ie, the n in 2^n. template <> struct ScalarTraits { static void output(const SectionAlignment &value, void *ctxt, raw_ostream &out) { out << llvm::format("%d", (uint32_t)value); } static StringRef input(StringRef scalar, void *ctxt, SectionAlignment &value) { uint32_t alignment; if (scalar.getAsInteger(0, alignment)) { return "malformed alignment value"; } if (!llvm::isPowerOf2_32(alignment)) return "alignment must be a power of 2"; value = alignment; return StringRef(); // returning empty string means success } static bool mustQuote(StringRef) { return false; } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, NListType &value) { io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); } }; template <> struct ScalarBitSetTraits { static void bitset(IO &io, SymbolScope &value) { io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); } }; template <> struct ScalarBitSetTraits { static void bitset(IO &io, SymbolDesc &value) { io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); } }; template <> struct MappingTraits
{ struct NormalizedContentBytes; static void mapping(IO &io, Section §) { io.mapRequired("segment", sect.segmentName); io.mapRequired("section", sect.sectionName); io.mapRequired("type", sect.type); io.mapOptional("attributes", sect.attributes); io.mapOptional("alignment", sect.alignment, (SectionAlignment)1); io.mapRequired("address", sect.address); if (isZeroFillSection(sect.type)) { // S_ZEROFILL sections use "size:" instead of "content:" uint64_t size = sect.content.size(); io.mapOptional("size", size); if (!io.outputting()) { uint8_t *bytes = nullptr; sect.content = makeArrayRef(bytes, size); } } else { MappingNormalization> content( io, sect.content); io.mapOptional("content", content->_normalizedContent); } io.mapOptional("relocations", sect.relocations); io.mapOptional("indirect-syms", sect.indirectSymbols); } struct NormalizedContent { NormalizedContent(IO &io) : _io(io) {} NormalizedContent(IO &io, ArrayRef content) : _io(io) { // When writing yaml, copy content byte array to Hex8 vector. for (auto &c : content) { _normalizedContent.push_back(c); } } ArrayRef denormalize(IO &io) { // When reading yaml, allocate byte array owned by NormalizedFile and // copy Hex8 vector to byte array. YamlContext *info = reinterpret_cast(io.getContext()); assert(info != nullptr); NormalizedFile *file = info->_normalizeMachOFile; assert(file != nullptr); size_t size = _normalizedContent.size(); if (!size) return None; uint8_t *bytes = file->ownedAllocations.Allocate(size); std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); return makeArrayRef(bytes, size); } IO &_io; ContentBytes _normalizedContent; }; }; template <> struct MappingTraits { static void mapping(IO &io, Relocation &reloc) { io.mapRequired("offset", reloc.offset); io.mapOptional("scattered", reloc.scattered, false); io.mapRequired("type", reloc.type); io.mapRequired("length", reloc.length); io.mapRequired("pc-rel", reloc.pcRel); if ( !reloc.scattered ) io.mapRequired("extern", reloc.isExtern); if ( reloc.scattered ) io.mapRequired("value", reloc.value); if ( !reloc.scattered ) io.mapRequired("symbol", reloc.symbol); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, RelocationInfoType &value) { YamlContext *info = reinterpret_cast(io.getContext()); assert(info != nullptr); NormalizedFile *file = info->_normalizeMachOFile; assert(file != nullptr); switch (file->arch) { case lld::MachOLinkingContext::arch_x86_64: io.enumCase(value, "X86_64_RELOC_UNSIGNED", llvm::MachO::X86_64_RELOC_UNSIGNED); io.enumCase(value, "X86_64_RELOC_SIGNED", llvm::MachO::X86_64_RELOC_SIGNED); io.enumCase(value, "X86_64_RELOC_BRANCH", llvm::MachO::X86_64_RELOC_BRANCH); io.enumCase(value, "X86_64_RELOC_GOT_LOAD", llvm::MachO::X86_64_RELOC_GOT_LOAD); io.enumCase(value, "X86_64_RELOC_GOT", llvm::MachO::X86_64_RELOC_GOT); io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", llvm::MachO::X86_64_RELOC_SUBTRACTOR); io.enumCase(value, "X86_64_RELOC_SIGNED_1", llvm::MachO::X86_64_RELOC_SIGNED_1); io.enumCase(value, "X86_64_RELOC_SIGNED_2", llvm::MachO::X86_64_RELOC_SIGNED_2); io.enumCase(value, "X86_64_RELOC_SIGNED_4", llvm::MachO::X86_64_RELOC_SIGNED_4); io.enumCase(value, "X86_64_RELOC_TLV", llvm::MachO::X86_64_RELOC_TLV); break; case lld::MachOLinkingContext::arch_x86: io.enumCase(value, "GENERIC_RELOC_VANILLA", llvm::MachO::GENERIC_RELOC_VANILLA); io.enumCase(value, "GENERIC_RELOC_PAIR", llvm::MachO::GENERIC_RELOC_PAIR); io.enumCase(value, "GENERIC_RELOC_SECTDIFF", llvm::MachO::GENERIC_RELOC_SECTDIFF); io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); io.enumCase(value, "GENERIC_RELOC_TLV", llvm::MachO::GENERIC_RELOC_TLV); break; case lld::MachOLinkingContext::arch_armv6: case lld::MachOLinkingContext::arch_armv7: case lld::MachOLinkingContext::arch_armv7s: io.enumCase(value, "ARM_RELOC_VANILLA", llvm::MachO::ARM_RELOC_VANILLA); io.enumCase(value, "ARM_RELOC_PAIR", llvm::MachO::ARM_RELOC_PAIR); io.enumCase(value, "ARM_RELOC_SECTDIFF", llvm::MachO::ARM_RELOC_SECTDIFF); io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); io.enumCase(value, "ARM_RELOC_BR24", llvm::MachO::ARM_RELOC_BR24); io.enumCase(value, "ARM_THUMB_RELOC_BR22", llvm::MachO::ARM_THUMB_RELOC_BR22); io.enumCase(value, "ARM_RELOC_HALF", llvm::MachO::ARM_RELOC_HALF); io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", llvm::MachO::ARM_RELOC_HALF_SECTDIFF); break; case lld::MachOLinkingContext::arch_arm64: io.enumCase(value, "ARM64_RELOC_UNSIGNED", llvm::MachO::ARM64_RELOC_UNSIGNED); io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", llvm::MachO::ARM64_RELOC_SUBTRACTOR); io.enumCase(value, "ARM64_RELOC_BRANCH26", llvm::MachO::ARM64_RELOC_BRANCH26); io.enumCase(value, "ARM64_RELOC_PAGE21", llvm::MachO::ARM64_RELOC_PAGE21); io.enumCase(value, "ARM64_RELOC_PAGEOFF12", llvm::MachO::ARM64_RELOC_PAGEOFF12); io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); io.enumCase(value, "ARM64_RELOC_ADDEND", llvm::MachO::ARM64_RELOC_ADDEND); break; default: llvm_unreachable("unknown architecture"); } } }; template <> struct MappingTraits { static void mapping(IO &io, Symbol& sym) { io.mapRequired("name", sym.name); io.mapRequired("type", sym.type); io.mapOptional("scope", sym.scope, SymbolScope(0)); io.mapOptional("sect", sym.sect, (uint8_t)0); if (sym.type == llvm::MachO::N_UNDF) { // In undef symbols, desc field contains alignment/ordinal info // which is better represented as a hex vaule. uint16_t t1 = sym.desc; Hex16 t2 = t1; io.mapOptional("desc", t2, Hex16(0)); sym.desc = t2; } else { // In defined symbols, desc fit is a set of option bits. io.mapOptional("desc", sym.desc, SymbolDesc(0)); } io.mapRequired("value", sym.value); } }; // Custom mapping for VMProtect (e.g. "r-x"). template <> struct ScalarTraits { static void output(const VMProtect &value, void*, raw_ostream &out) { out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); } static StringRef input(StringRef scalar, void*, VMProtect &value) { value = 0; if (scalar.size() != 3) return "segment access protection must be three chars (e.g. \"r-x\")"; switch (scalar[0]) { case 'r': value = llvm::MachO::VM_PROT_READ; break; case '-': break; default: return "segment access protection first char must be 'r' or '-'"; } switch (scalar[1]) { case 'w': value = value | llvm::MachO::VM_PROT_WRITE; break; case '-': break; default: return "segment access protection second char must be 'w' or '-'"; } switch (scalar[2]) { case 'x': value = value | llvm::MachO::VM_PROT_EXECUTE; break; case '-': break; default: return "segment access protection third char must be 'x' or '-'"; } // Return the empty string on success, return StringRef(); } static bool mustQuote(StringRef) { return false; } }; template <> struct MappingTraits { static void mapping(IO &io, Segment& seg) { io.mapRequired("name", seg.name); io.mapRequired("address", seg.address); io.mapRequired("size", seg.size); io.mapRequired("init-access", seg.init_access); io.mapRequired("max-access", seg.max_access); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, LoadCommandType &value) { io.enumCase(value, "LC_LOAD_DYLIB", llvm::MachO::LC_LOAD_DYLIB); io.enumCase(value, "LC_LOAD_WEAK_DYLIB", llvm::MachO::LC_LOAD_WEAK_DYLIB); io.enumCase(value, "LC_REEXPORT_DYLIB", llvm::MachO::LC_REEXPORT_DYLIB); io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", llvm::MachO::LC_LOAD_UPWARD_DYLIB); io.enumCase(value, "LC_LAZY_LOAD_DYLIB", llvm::MachO::LC_LAZY_LOAD_DYLIB); io.enumCase(value, "LC_VERSION_MIN_MACOSX", llvm::MachO::LC_VERSION_MIN_MACOSX); io.enumCase(value, "LC_VERSION_MIN_IPHONEOS", llvm::MachO::LC_VERSION_MIN_IPHONEOS); io.enumCase(value, "LC_VERSION_MIN_TVOS", llvm::MachO::LC_VERSION_MIN_TVOS); io.enumCase(value, "LC_VERSION_MIN_WATCHOS", llvm::MachO::LC_VERSION_MIN_WATCHOS); } }; template <> struct MappingTraits { static void mapping(IO &io, DependentDylib& dylib) { io.mapRequired("path", dylib.path); io.mapOptional("kind", dylib.kind, llvm::MachO::LC_LOAD_DYLIB); io.mapOptional("compat-version", dylib.compatVersion, PackedVersion(0x10000)); io.mapOptional("current-version", dylib.currentVersion, PackedVersion(0x10000)); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, RebaseType &value) { io.enumCase(value, "REBASE_TYPE_POINTER", llvm::MachO::REBASE_TYPE_POINTER); io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", llvm::MachO::REBASE_TYPE_TEXT_PCREL32); io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); } }; template <> struct MappingTraits { static void mapping(IO &io, RebaseLocation& rebase) { io.mapRequired("segment-index", rebase.segIndex); io.mapRequired("segment-offset", rebase.segOffset); io.mapOptional("kind", rebase.kind, llvm::MachO::REBASE_TYPE_POINTER); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, BindType &value) { io.enumCase(value, "BIND_TYPE_POINTER", llvm::MachO::BIND_TYPE_POINTER); io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", llvm::MachO::BIND_TYPE_TEXT_PCREL32); } }; template <> struct MappingTraits { static void mapping(IO &io, BindLocation &bind) { io.mapRequired("segment-index", bind.segIndex); io.mapRequired("segment-offset", bind.segOffset); io.mapOptional("kind", bind.kind, llvm::MachO::BIND_TYPE_POINTER); io.mapOptional("can-be-null", bind.canBeNull, false); io.mapRequired("ordinal", bind.ordinal); io.mapRequired("symbol-name", bind.symbolName); io.mapOptional("addend", bind.addend, Hex64(0)); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, ExportSymbolKind &value) { io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); } }; template <> struct ScalarBitSetTraits { static void bitset(IO &io, ExportFlags &value) { io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); } }; template <> struct MappingTraits { static void mapping(IO &io, Export &exp) { io.mapRequired("name", exp.name); io.mapOptional("offset", exp.offset); io.mapOptional("kind", exp.kind, llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); if (!io.outputting() || exp.flags) io.mapOptional("flags", exp.flags); io.mapOptional("other", exp.otherOffset, Hex32(0)); io.mapOptional("other-name", exp.otherName, StringRef()); } }; template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, DataRegionType &value) { io.enumCase(value, "DICE_KIND_DATA", llvm::MachO::DICE_KIND_DATA); io.enumCase(value, "DICE_KIND_JUMP_TABLE8", llvm::MachO::DICE_KIND_JUMP_TABLE8); io.enumCase(value, "DICE_KIND_JUMP_TABLE16", llvm::MachO::DICE_KIND_JUMP_TABLE16); io.enumCase(value, "DICE_KIND_JUMP_TABLE32", llvm::MachO::DICE_KIND_JUMP_TABLE32); io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); } }; template <> struct MappingTraits { static void mapping(IO &io, DataInCode &entry) { io.mapRequired("offset", entry.offset); io.mapRequired("length", entry.length); io.mapRequired("kind", entry.kind); } }; template <> struct ScalarTraits { static void output(const PackedVersion &value, void*, raw_ostream &out) { out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); if (value & 0xFF) { out << llvm::format(".%d", (value & 0xFF)); } } static StringRef input(StringRef scalar, void*, PackedVersion &result) { uint32_t value; if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) return "malformed version number"; result = value; // Return the empty string on success, return StringRef(); } static bool mustQuote(StringRef) { return false; } }; template <> struct MappingTraits { static void mapping(IO &io, NormalizedFile &file) { io.mapRequired("arch", file.arch); io.mapRequired("file-type", file.fileType); io.mapOptional("flags", file.flags); io.mapOptional("dependents", file.dependentDylibs); io.mapOptional("install-name", file.installName, StringRef()); io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); io.mapOptional("has-UUID", file.hasUUID, true); io.mapOptional("rpaths", file.rpaths); io.mapOptional("entry-point", file.entryAddress, Hex64(0)); io.mapOptional("stack-size", file.stackSize, Hex64(0)); io.mapOptional("source-version", file.sourceVersion, Hex64(0)); io.mapOptional("OS", file.os); io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0); io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); io.mapOptional("segments", file.segments); io.mapOptional("sections", file.sections); io.mapOptional("local-symbols", file.localSymbols); io.mapOptional("global-symbols", file.globalSymbols); io.mapOptional("undefined-symbols",file.undefinedSymbols); io.mapOptional("page-size", file.pageSize, Hex32(4096)); io.mapOptional("rebasings", file.rebasingInfo); io.mapOptional("bindings", file.bindingInfo); io.mapOptional("weak-bindings", file.weakBindingInfo); io.mapOptional("lazy-bindings", file.lazyBindingInfo); io.mapOptional("exports", file.exportInfo); io.mapOptional("dataInCode", file.dataInCode); } static StringRef validate(IO &io, NormalizedFile &file) { return StringRef(); } }; } // namespace llvm } // namespace yaml namespace lld { namespace mach_o { /// Handles !mach-o tagged yaml documents. bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const { if (!io.mapTag("!mach-o")) return false; // Step 1: parse yaml into normalized mach-o struct. NormalizedFile nf; YamlContext *info = reinterpret_cast(io.getContext()); assert(info != nullptr); assert(info->_normalizeMachOFile == nullptr); info->_normalizeMachOFile = &nf; MappingTraits::mapping(io, nf); // Step 2: parse normalized mach-o struct into atoms. auto fileOrError = normalizedToAtoms(nf, info->_path, true); // Check that we parsed successfully. if (!fileOrError) { std::string buffer; llvm::raw_string_ostream stream(buffer); handleAllErrors(fileOrError.takeError(), [&](const llvm::ErrorInfoBase &EI) { EI.log(stream); stream << "\n"; }); io.setError(stream.str()); return false; } if (nf.arch != _arch) { io.setError(Twine("file is wrong architecture. Expected (" + MachOLinkingContext::nameFromArch(_arch) + ") found (" + MachOLinkingContext::nameFromArch(nf.arch) + ")")); return false; } info->_normalizeMachOFile = nullptr; file = fileOrError->release(); return true; } namespace normalized { /// Parses a yaml encoded mach-o file to produce an in-memory normalized view. llvm::Expected> readYaml(std::unique_ptr &mb) { // Make empty NormalizedFile. std::unique_ptr f(new NormalizedFile()); // Create YAML Input parser. YamlContext yamlContext; yamlContext._normalizeMachOFile = f.get(); llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); // Fill NormalizedFile by parsing yaml. yin >> *f; // Return error if there were parsing problems. if (auto ec = yin.error()) return llvm::make_error(Twine("YAML parsing error: ") + ec.message()); // Hand ownership of instantiated NormalizedFile to caller. return std::move(f); } /// Writes a yaml encoded mach-o files from an in-memory normalized view. std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { // YAML I/O is not const aware, so need to cast away ;-( NormalizedFile *f = const_cast(&file); // Create yaml Output writer, using yaml options for context. YamlContext yamlContext; yamlContext._normalizeMachOFile = f; llvm::yaml::Output yout(out, &yamlContext); // Stream out yaml. yout << *f; return std::error_code(); } } // namespace normalized } // namespace mach_o } // namespace lld diff --git a/test/COFF/Inputs/combined-resources-2.rc b/test/COFF/Inputs/combined-resources-2.rc new file mode 100644 index 000000000000..081b3a77bebc --- /dev/null +++ b/test/COFF/Inputs/combined-resources-2.rc @@ -0,0 +1,36 @@ +#include "windows.h" + +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +randomdat RCDATA +{ + "this is a random bit of data that means nothing\0", + 0x23a9, + 0x140e, + 194292, +} + +LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED +randomdat RCDATA +{ + "zhe4 shi4 yi1ge4 sui2ji1 de shu4ju4, zhe4 yi4wei4zhe shen2me\0", + 0x23a9, + 0x140e, + 194292, +} + +LANGUAGE LANG_GERMAN, SUBLANG_GERMAN_LUXEMBOURG +randomdat RCDATA +{ + "Dies ist ein zufälliges Bit von Daten, die nichts bedeutet\0", + 0x23a9, + 0x140e, + 194292, +} + +LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED +myaccelerators ACCELERATORS +{ + "^C", 999, VIRTKEY, ALT + "D", 1100, VIRTKEY, CONTROL, SHIFT + "^R", 444, ASCII, NOINVERT +} diff --git a/test/COFF/Inputs/combined-resources-2.res b/test/COFF/Inputs/combined-resources-2.res new file mode 100644 index 000000000000..31da6166d7f6 Binary files /dev/null and b/test/COFF/Inputs/combined-resources-2.res differ diff --git a/test/COFF/Inputs/combined-resources-cursor.bmp b/test/COFF/Inputs/combined-resources-cursor.bmp new file mode 100644 index 000000000000..ce513261bc2c Binary files /dev/null and b/test/COFF/Inputs/combined-resources-cursor.bmp differ diff --git a/test/COFF/Inputs/combined-resources-okay.bmp b/test/COFF/Inputs/combined-resources-okay.bmp new file mode 100644 index 000000000000..e4005bf5ef97 Binary files /dev/null and b/test/COFF/Inputs/combined-resources-okay.bmp differ diff --git a/test/COFF/Inputs/combined-resources.rc b/test/COFF/Inputs/combined-resources.rc new file mode 100644 index 000000000000..08bfb94c44ae --- /dev/null +++ b/test/COFF/Inputs/combined-resources.rc @@ -0,0 +1,50 @@ +#include "windows.h" + +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US + +myaccelerators ACCELERATORS +{ + "^C", 999, VIRTKEY, ALT + "D", 1100, VIRTKEY, CONTROL, SHIFT + "^R", 444, ASCII, NOINVERT +} + +cursor BITMAP "combined-resources-cursor.bmp" +okay BITMAP "combined-resources-okay.bmp" + +14432 MENU +LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED +{ + MENUITEM "yu", 100 + MENUITEM "shala", 101 + MENUITEM "kaoya", 102 +} + +testdialog DIALOG 10, 10, 200, 300 +STYLE WS_POPUP | WS_BORDER +CAPTION "Test" +{ + CTEXT "Continue:", 1, 10, 10, 230, 14 + PUSHBUTTON "&OK", 2, 66, 134, 161, 13 +} + +12 ACCELERATORS +{ + "X", 164, VIRTKEY, ALT + "H", 5678, VIRTKEY, CONTROL, SHIFT + "^R", 444, ASCII, NOINVERT +} + +"eat" MENU +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_AUS +{ + MENUITEM "fish", 100 + MENUITEM "salad", 101 + MENUITEM "duck", 102 +} + + +myresource stringarray { + "this is a user defined resource\0", + "it contains many strings\0", +} diff --git a/test/COFF/Inputs/combined-resources.res b/test/COFF/Inputs/combined-resources.res new file mode 100644 index 000000000000..d422bb4904da Binary files /dev/null and b/test/COFF/Inputs/combined-resources.res differ diff --git a/test/COFF/Inputs/pdb-global-gc.s b/test/COFF/Inputs/pdb-global-gc.s new file mode 100644 index 000000000000..4c931dca015c --- /dev/null +++ b/test/COFF/Inputs/pdb-global-gc.s @@ -0,0 +1,4 @@ +.section .data,"dw",one_only,__wc_mb_cur +.global __wc_mb_cur +__wc_mb_cur: +.long 42 diff --git a/test/COFF/Inputs/pdb-import-gc.lib b/test/COFF/Inputs/pdb-import-gc.lib new file mode 100644 index 000000000000..f4682eddb6e8 Binary files /dev/null and b/test/COFF/Inputs/pdb-import-gc.lib differ diff --git a/test/COFF/combined-resources.test b/test/COFF/combined-resources.test new file mode 100644 index 000000000000..dc6c87af0f77 --- /dev/null +++ b/test/COFF/combined-resources.test @@ -0,0 +1,17 @@ +// Check that lld properly handles merging multiple .res files. +// The inputs were generated with the following commands, using the original +// Windows rc.exe +// > rc /fo combined-resources.res /nologo combined-resources.rc +// > rc /fo combined-resources-2.res /nologo combined-resources-2.rc + +# RUN: yaml2obj < %p/Inputs/ret42.yaml > %t.obj +# RUN: lld-link /out:%t.exe /entry:main %t.obj %p/Inputs/resource.res \ +# RUN: %p/Inputs/combined-resources.res %p/Inputs/combined-resources-2.res + +# RUN: llvm-readobj -coff-resources -file-headers %t.exe | FileCheck %s + + +CHECK: ResourceTableRVA: 0x1000 +CHECK-NEXT: ResourceTableSize: 0xC1C +CHECK-DAG: Resources [ +CHECK-NEXT: Total Number of Resources: 13 diff --git a/test/COFF/hello32.test b/test/COFF/hello32.test index 9464a3c114eb..e987bb953890 100644 --- a/test/COFF/hello32.test +++ b/test/COFF/hello32.test @@ -1,131 +1,132 @@ # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj # RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /out:%t.exe /appcontainer # RUN: llvm-readobj -file-headers %t.exe | FileCheck -check-prefix=HEADER %s # RUN: llvm-readobj -coff-imports %t.exe | FileCheck -check-prefix=IMPORTS %s # RUN: llvm-readobj -coff-basereloc %t.exe | FileCheck -check-prefix=BASEREL %s HEADER: Format: COFF-i386 HEADER-NEXT: Arch: i386 HEADER-NEXT: AddressSize: 32bit HEADER-NEXT: ImageFileHeader { HEADER-NEXT: Machine: IMAGE_FILE_MACHINE_I386 (0x14C) HEADER-NEXT: SectionCount: 4 HEADER-NEXT: TimeDateStamp: 1970-01-01 00:00:00 (0x0) HEADER-NEXT: PointerToSymbolTable: 0x0 HEADER-NEXT: SymbolCount: 0 HEADER-NEXT: OptionalHeaderSize: 224 HEADER-NEXT: Characteristics [ (0x102) HEADER-NEXT: IMAGE_FILE_32BIT_MACHINE (0x100) HEADER-NEXT: IMAGE_FILE_EXECUTABLE_IMAGE (0x2) HEADER-NEXT: ] HEADER-NEXT: } HEADER-NEXT: ImageOptionalHeader { +HEADER-NEXT: Magic: 0x10B HEADER-NEXT: MajorLinkerVersion: 14 HEADER-NEXT: MinorLinkerVersion: 0 HEADER-NEXT: SizeOfCode: 512 HEADER-NEXT: SizeOfInitializedData: 1536 HEADER-NEXT: SizeOfUninitializedData: 0 HEADER-NEXT: AddressOfEntryPoint: 0x2000 HEADER-NEXT: BaseOfCode: 0x2000 HEADER-NEXT: BaseOfData: 0x0 HEADER-NEXT: ImageBase: 0x400000 HEADER-NEXT: SectionAlignment: 4096 HEADER-NEXT: FileAlignment: 512 HEADER-NEXT: MajorOperatingSystemVersion: 6 HEADER-NEXT: MinorOperatingSystemVersion: 0 HEADER-NEXT: MajorImageVersion: 0 HEADER-NEXT: MinorImageVersion: 0 HEADER-NEXT: MajorSubsystemVersion: 6 HEADER-NEXT: MinorSubsystemVersion: 0 HEADER-NEXT: SizeOfImage: 16896 HEADER-NEXT: SizeOfHeaders: 512 HEADER-NEXT: Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3) HEADER-NEXT: Characteristics [ (0x9940) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_APPCONTAINER (0x1000) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_NO_BIND (0x800) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000) HEADER-NEXT: ] HEADER-NEXT: SizeOfStackReserve: 1048576 HEADER-NEXT: SizeOfStackCommit: 4096 HEADER-NEXT: SizeOfHeapReserve: 1048576 HEADER-NEXT: SizeOfHeapCommit: 4096 HEADER-NEXT: NumberOfRvaAndSize: 16 HEADER-NEXT: DataDirectory { HEADER-NEXT: ExportTableRVA: 0x0 HEADER-NEXT: ExportTableSize: 0x0 HEADER-NEXT: ImportTableRVA: 0x3000 HEADER-NEXT: ImportTableSize: 0x28 HEADER-NEXT: ResourceTableRVA: 0x0 HEADER-NEXT: ResourceTableSize: 0x0 HEADER-NEXT: ExceptionTableRVA: 0x0 HEADER-NEXT: ExceptionTableSize: 0x0 HEADER-NEXT: CertificateTableRVA: 0x0 HEADER-NEXT: CertificateTableSize: 0x0 HEADER-NEXT: BaseRelocationTableRVA: 0x4000 HEADER-NEXT: BaseRelocationTableSize: 0x10 HEADER-NEXT: DebugRVA: 0x0 HEADER-NEXT: DebugSize: 0x0 HEADER-NEXT: ArchitectureRVA: 0x0 HEADER-NEXT: ArchitectureSize: 0x0 HEADER-NEXT: GlobalPtrRVA: 0x0 HEADER-NEXT: GlobalPtrSize: 0x0 HEADER-NEXT: TLSTableRVA: 0x0 HEADER-NEXT: TLSTableSize: 0x0 HEADER-NEXT: LoadConfigTableRVA: 0x0 HEADER-NEXT: LoadConfigTableSize: 0x0 HEADER-NEXT: BoundImportRVA: 0x0 HEADER-NEXT: BoundImportSize: 0x0 HEADER-NEXT: IATRVA: 0x3034 HEADER-NEXT: IATSize: 0xC HEADER-NEXT: DelayImportDescriptorRVA: 0x0 HEADER-NEXT: DelayImportDescriptorSize: 0x0 HEADER-NEXT: CLRRuntimeHeaderRVA: 0x0 HEADER-NEXT: CLRRuntimeHeaderSize: 0x0 HEADER-NEXT: ReservedRVA: 0x0 HEADER-NEXT: ReservedSize: 0x0 HEADER-NEXT: } HEADER-NEXT: } HEADER-NEXT: DOSHeader { HEADER-NEXT: Magic: MZ HEADER-NEXT: UsedBytesInTheLastPage: 0 HEADER-NEXT: FileSizeInPages: 0 HEADER-NEXT: NumberOfRelocationItems: 0 HEADER-NEXT: HeaderSizeInParagraphs: 0 HEADER-NEXT: MinimumExtraParagraphs: 0 HEADER-NEXT: MaximumExtraParagraphs: 0 HEADER-NEXT: InitialRelativeSS: 0 HEADER-NEXT: InitialSP: 0 HEADER-NEXT: Checksum: 0 HEADER-NEXT: InitialIP: 0 HEADER-NEXT: InitialRelativeCS: 0 HEADER-NEXT: AddressOfRelocationTable: 64 HEADER-NEXT: OverlayNumber: 0 HEADER-NEXT: OEMid: 0 HEADER-NEXT: OEMinfo: 0 HEADER-NEXT: AddressOfNewExeHeader: 64 HEADER-NEXT: } IMPORTS: Format: COFF-i386 IMPORTS: Arch: i386 IMPORTS: AddressSize: 32bit IMPORTS: Import { IMPORTS: Name: std32.dll IMPORTS: ImportLookupTableRVA: 0x3028 IMPORTS: ImportAddressTableRVA: 0x3034 IMPORTS: Symbol: ExitProcess (0) IMPORTS: Symbol: MessageBoxA (1) IMPORTS: } BASEREL: BaseReloc [ BASEREL: Entry { BASEREL: Type: HIGHLOW BASEREL: Address: 0x2005 BASEREL: } BASEREL: Entry { BASEREL: Type: HIGHLOW BASEREL: Address: 0x200C BASEREL: } BASEREL: ] diff --git a/test/COFF/pdb-comdat.test b/test/COFF/pdb-comdat.test index 769b630d0e8d..a7b5c401ab92 100644 --- a/test/COFF/pdb-comdat.test +++ b/test/COFF/pdb-comdat.test @@ -1,100 +1,99 @@ Consider this example program with an inline function "foo": ==> foo.h <== extern int global; __inline void foo() { ++global; } void bar(); ==> pdb_comdat_main.c <== #include "foo.h" int main(void) { foo(); bar(); return 42; } ==> pdb_comdat_bar.c <== #include "foo.h" void bar(void) { foo(); } Both object files will contain debug info for foo, but only the debug info from pdb_comdat_main.obj should be included in the PDB. RUN: rm -rf %t && mkdir -p %t && cd %t RUN: yaml2obj %S/Inputs/pdb_comdat_main.yaml -o pdb_comdat_main.obj RUN: yaml2obj %S/Inputs/pdb_comdat_bar.yaml -o pdb_comdat_bar.obj RUN: lld-link pdb_comdat_main.obj pdb_comdat_bar.obj -out:t.exe -debug -pdb:t.pdb -nodefaultlib -entry:main RUN: llvm-pdbutil dump -l -symbols t.pdb | FileCheck %s CHECK: Lines CHECK: ============================================================ CHECK-LABEL: Mod 0000 | `{{.*}}pdb_comdat_main.obj`: CHECK: c:\src\llvm-project\build\pdb_comdat_main.c (MD5: F969E51BBE373436D81492EB61387F36) CHECK: c:\src\llvm-project\build\foo.h (MD5: D74D834EFAC3AE2B45E606A8320B1D5C) CHECK-LABEL: Mod 0001 | `{{.*}}pdb_comdat_bar.obj`: CHECK: c:\src\llvm-project\build\pdb_comdat_bar.c (MD5: 365279DB4FCBEDD721BBFC3B14A953C2) CHECK-NOT: c:\src\llvm-project\build\foo.h CHECK-LABEL: Mod 0002 | `* Linker *`: CHECK: Symbols CHECK: ============================================================ CHECK-LABEL: Mod 0000 | `{{.*}}pdb_comdat_main.obj`: -CHECK: - S_OBJNAME [size = 56] sig=0, `C:\src\llvm-project\build\pdb_comdat_main.obj` -CHECK: - S_COMPILE3 [size = 60] -CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c -CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 -CHECK: flags = security checks | hot patchable -CHECK: - S_GPROC32_ID [size = 44] `main` - FIXME: We need to fill in "end". -CHECK: parent = 0, addr = 0002:0000, code size = 24, end = 0 -CHECK: debug start = 4, debug end = 19, flags = none -CHECK: - S_FRAMEPROC [size = 32] -CHECK: size = 40, padding size = 0, offset to padding = 0 -CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 -CHECK: flags = has async eh | opt speed -CHECK: - S_END [size = 4] -CHECK: - S_GDATA32 [size = 24] `global` -CHECK: type = 0x0074 (int), addr = 0000:0000 -CHECK: - S_BUILDINFO [size = 8] BuildId = `4106` -CHECK: - S_GPROC32_ID [size = 44] `foo` -CHECK: parent = 0, addr = 0002:0032, code size = 15, end = 0 -CHECK: debug start = 0, debug end = 14, flags = none -CHECK: - S_FRAMEPROC [size = 32] -CHECK: size = 0, padding size = 0, offset to padding = 0 -CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 -CHECK: flags = marked inline | has async eh | opt speed -CHECK: - S_END [size = 4] +CHECK: 4 | S_OBJNAME [size = 56] sig=0, `C:\src\llvm-project\build\pdb_comdat_main.obj` +CHECK: 60 | S_COMPILE3 [size = 60] +CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c +CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 +CHECK: flags = security checks | hot patchable +CHECK: 120 | S_GPROC32_ID [size = 44] `main` +CHECK: parent = 0, end = 0, addr = 0002:0000, code size = 24 +CHECK: debug start = 4, debug end = 19, flags = none +CHECK: 164 | S_FRAMEPROC [size = 32] +CHECK: size = 40, padding size = 0, offset to padding = 0 +CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 +CHECK: flags = has async eh | opt speed +CHECK: 196 | S_END [size = 4] +CHECK: 200 | S_GDATA32 [size = 24] `global` +CHECK: type = 0x0074 (int), addr = 0000:0000 +CHECK: 224 | S_BUILDINFO [size = 8] BuildId = `4106` +CHECK: 232 | S_GPROC32_ID [size = 44] `foo` +CHECK: parent = 0, end = 0, addr = 0002:0032, code size = 15 +CHECK: debug start = 0, debug end = 14, flags = none +CHECK: 276 | S_FRAMEPROC [size = 32] +CHECK: size = 0, padding size = 0, offset to padding = 0 +CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 +CHECK: flags = marked inline | has async eh | opt speed +CHECK: 308 | S_END [size = 4] CHECK-LABEL: Mod 0001 | `{{.*}}pdb_comdat_bar.obj`: -CHECK: - S_OBJNAME [size = 56] sig=0, `C:\src\llvm-project\build\pdb_comdat_bar.obj` -CHECK: - S_COMPILE3 [size = 60] +CHECK: 4 | S_OBJNAME [size = 56] sig=0, `C:\src\llvm-project\build\pdb_comdat_bar.obj` +CHECK: 60 | S_COMPILE3 [size = 60] CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 CHECK: flags = security checks | hot patchable -CHECK: - S_GPROC32_ID [size = 44] `bar` -CHECK: parent = 0, addr = 0002:0048, code size = 14, end = 0 +CHECK: 120 | S_GPROC32_ID [size = 44] `bar` +CHECK: parent = 0, end = 0, addr = 0002:0048, code size = 14 CHECK: debug start = 4, debug end = 9, flags = none -CHECK: - S_FRAMEPROC [size = 32] +CHECK: 164 | S_FRAMEPROC [size = 32] CHECK: size = 40, padding size = 0, offset to padding = 0 CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 CHECK: flags = has async eh | opt speed -CHECK: - S_END [size = 4] -CHECK: - S_GDATA32 [size = 24] `global` +CHECK: 196 | S_END [size = 4] +CHECK: 200 | S_GDATA32 [size = 24] `global` CHECK: type = 0x0074 (int), addr = 0000:0000 -CHECK: - S_BUILDINFO [size = 8] BuildId = `4109` -CHECK-NOT: - S_GPROC32_ID {{.*}} `foo` +CHECK: 224 | S_BUILDINFO [size = 8] BuildId = `4109` +CHECK-NOT: S_GPROC32_ID {{.*}} `foo` CHECK-LABEL: Mod 0002 | `* Linker *`: Reorder the object files and verify that the other table is selected. RUN: lld-link pdb_comdat_bar.obj pdb_comdat_main.obj -out:t.exe -debug -pdb:t.pdb -nodefaultlib -entry:main RUN: llvm-pdbutil dump -l t.pdb | FileCheck %s --check-prefix=REORDER REORDER-LABEL: Mod 0000 | `{{.*}}pdb_comdat_bar.obj`: REORDER: c:\src\llvm-project\build\pdb_comdat_bar.c (MD5: 365279DB4FCBEDD721BBFC3B14A953C2) REORDER: c:\src\llvm-project\build\foo.h (MD5: D74D834EFAC3AE2B45E606A8320B1D5C) REORDER-LABEL: Mod 0001 | `{{.*}}pdb_comdat_main.obj`: REORDER: c:\src\llvm-project\build\pdb_comdat_main.c REORDER-NOT: c:\src\llvm-project\build\foo.h REORDER-LABEL: Mod 0002 | `* Linker *`: diff --git a/test/COFF/pdb-global-gc.yaml b/test/COFF/pdb-global-gc.yaml new file mode 100644 index 000000000000..b66b3f2ca7b8 --- /dev/null +++ b/test/COFF/pdb-global-gc.yaml @@ -0,0 +1,116 @@ +# RUN: yaml2obj %s -o %t.obj +# RUN: llvm-mc %S/Inputs/pdb-global-gc.s -triple x86_64-windows-msvc -filetype=obj -o %t2.obj +# RUN: lld-link %t.obj %t2.obj -debug -entry:main \ +# RUN: -nodefaultlib -debug -out:%t.exe -pdb:%t.pdb -verbose +# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s + +# This tests the case where an __imp_ chunk is discarded by linker GC. The debug +# info may refer to the __imp_ symbol still. + +# Compile this code with MSVC to regenerate the test case: +# extern char __declspec(dllimport) __wc_mb_cur; +# int discarded() { return __wc_mb_cur; } +# int main() { return g2; } + +# CHECK: Symbols +# CHECK: ============================================================ +# CHECK: Mod 0000 | `{{.*}}pdb-global-gc.yaml.tmp.obj`: +# CHECK: 4 | S_GDATA32 [size = 28] `__wc_mb_cur` +# CHECK-NEXT: type = 0x0070 (char), addr = 0000:0000 +# CHECK: Mod 0001 | `{{.*}}pdb-global-gc.yaml.tmp2.obj`: +# CHECK: Mod 0002 | `* Linker *`: + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$S' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Subsections: + - !Symbols + Records: + - Kind: S_GDATA32 + DataSym: + Type: 112 + DisplayName: __wc_mb_cur + - !StringTable + Strings: + Relocations: + - VirtualAddress: 20 + SymbolName: __wc_mb_cur + Type: IMAGE_REL_AMD64_SECREL + - VirtualAddress: 24 + SymbolName: __wc_mb_cur + Type: IMAGE_REL_AMD64_SECTION + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 16 + SectionData: 0FBE0500000000C3 + Relocations: + - VirtualAddress: 3 + SymbolName: __wc_mb_cur + Type: IMAGE_REL_AMD64_REL32 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 16 + SectionData: B82A000000C3 +symbols: + - Name: '.debug$S' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 240 + NumberOfRelocations: 2 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.text$mn' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 11 + NumberOfRelocations: 1 + NumberOfLinenumbers: 0 + CheckSum: 2906070869 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_NODUPLICATES + - Name: '.text$mn' + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 2139436471 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_NODUPLICATES + - Name: discarded + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: main + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __wc_mb_cur + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/test/COFF/pdb-import-gc.yaml b/test/COFF/pdb-import-gc.yaml new file mode 100644 index 000000000000..80484cb75f4f --- /dev/null +++ b/test/COFF/pdb-import-gc.yaml @@ -0,0 +1,114 @@ +# RUN: yaml2obj %s -o %t.obj +# RUN: lld-link %t.obj %S/Inputs/pdb-import-gc.lib -debug -entry:main \ +# RUN: -nodefaultlib -debug -out:%t.exe -pdb:%t.pdb +# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s + +# This tests the case where an __imp_ chunk is discarded by linker GC. The debug +# info may refer to the __imp_ symbol still. + +# Compile this code with MSVC to regenerate the test case: +# extern char __declspec(dllimport) __wc_mb_cur; +# int discarded() { return __wc_mb_cur; } +# int main() { return g2; } + +# CHECK: Symbols +# CHECK: ============================================================ +# CHECK: Mod 0000 | `{{.*}}pdb-import-gc.yaml.tmp.obj`: +# CHECK: 4 | S_GDATA32 [size = 32] `__imp___wc_mb_cur` +# CHECK-NEXT: type = 0x0070 (char), addr = 0000:0000 +# CHECK: Mod 0001 | `* Linker *`: + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$S' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Subsections: + - !Symbols + Records: + - Kind: S_GDATA32 + DataSym: + Type: 112 + DisplayName: __imp___wc_mb_cur + - !StringTable + Strings: + Relocations: + - VirtualAddress: 20 + SymbolName: __imp___wc_mb_cur + Type: IMAGE_REL_AMD64_SECREL + - VirtualAddress: 24 + SymbolName: __imp___wc_mb_cur + Type: IMAGE_REL_AMD64_SECTION + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 16 + SectionData: 488B05000000000FBE00C3 + Relocations: + - VirtualAddress: 3 + SymbolName: __imp___wc_mb_cur + Type: IMAGE_REL_AMD64_REL32 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 16 + SectionData: B82A000000C3 +symbols: + - Name: '.debug$S' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 240 + NumberOfRelocations: 2 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.text$mn' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 11 + NumberOfRelocations: 1 + NumberOfLinenumbers: 0 + CheckSum: 2906070869 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_NODUPLICATES + - Name: '.text$mn' + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 2139436471 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_NODUPLICATES + - Name: discarded + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: main + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: __imp___wc_mb_cur + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/test/COFF/pdb-safeseh.yaml b/test/COFF/pdb-safeseh.yaml index 9faa5042924d..24215bd11dbe 100644 --- a/test/COFF/pdb-safeseh.yaml +++ b/test/COFF/pdb-safeseh.yaml @@ -1,85 +1,85 @@ # RUN: yaml2obj %s -o %t.obj # RUN: lld-link -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s # There is an S_GDATA32 symbol record with .secrel32 and .secidx relocations in # it in this debug info. This is similar to the relocations in the loadcfg.obj # file in the MSVC CRT. We need to make sure that our relocation logic matches # MSVC's for these absolute, linker-provided symbols. # CHECK: Mod 0000 | -# CHECK-NEXT: - S_GDATA32 [size = 40] `___safe_se_handler_table` +# CHECK-NEXT: 4 | S_GDATA32 [size = 40] `___safe_se_handler_table` # CHECK-NEXT: type = 0x0022 (unsigned long), addr = 0003:0000 # CHECK-NEXT: Mod 0001 | `* Linker *`: --- !COFF -header: +header: Machine: IMAGE_FILE_MACHINE_I386 Characteristics: [ ] -sections: +sections: - Name: '.debug$S' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 - Subsections: + Subsections: - !Symbols - Records: + Records: - Kind: S_GDATA32 - DataSym: + DataSym: Type: 34 DisplayName: ___safe_se_handler_table - !StringTable - Strings: - Relocations: + Strings: + Relocations: - VirtualAddress: 20 SymbolName: ___safe_se_handler_table Type: IMAGE_REL_I386_SECREL - VirtualAddress: 24 SymbolName: ___safe_se_handler_table Type: IMAGE_REL_I386_SECTION - Name: '.text$mn' Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 16 SectionData: 488D0500000000C3 - Relocations: + Relocations: - VirtualAddress: 3 SymbolName: ___safe_se_handler_table Type: IMAGE_REL_I386_REL32 -symbols: +symbols: - Name: '.debug$S' Value: 0 SectionNumber: 1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 372 NumberOfRelocations: 6 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.text$mn' Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 8 NumberOfRelocations: 1 NumberOfLinenumbers: 0 CheckSum: 1092178131 Number: 0 - Name: _main Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL - Name: ___safe_se_handler_table Value: 0 SectionNumber: 0 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/test/COFF/pdb-secrel-absolute.yaml b/test/COFF/pdb-secrel-absolute.yaml index d74f07e32b98..c514e54e99f1 100644 --- a/test/COFF/pdb-secrel-absolute.yaml +++ b/test/COFF/pdb-secrel-absolute.yaml @@ -1,84 +1,84 @@ # RUN: yaml2obj %s -o %t.obj # RUN: lld-link -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s # There is an S_GDATA32 symbol record with .secrel32 and .secidx relocations in # it in this debug info. This is similar to the relocations in the loadcfg.obj # file in the MSVC CRT. We need to make sure that our relocation logic matches # MSVC's for these absolute, linker-provided symbols. # CHECK: Mod 0000 | -# CHECK-NEXT: - S_GDATA32 [size = 36] `__guard_fids_table` +# CHECK-NEXT: 4 | S_GDATA32 [size = 36] `__guard_fids_table` # CHECK-NEXT: type = 0x0022 (unsigned long), addr = 0003:0000 # CHECK-NEXT: Mod 0001 | `* Linker *`: --- !COFF -header: +header: Machine: IMAGE_FILE_MACHINE_AMD64 Characteristics: [ ] -sections: +sections: - Name: '.debug$S' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 - Subsections: + Subsections: - !Symbols - Records: + Records: - Kind: S_GDATA32 - DataSym: + DataSym: Type: 34 DisplayName: __guard_fids_table - !StringTable - Strings: - Relocations: + Strings: + Relocations: - VirtualAddress: 20 SymbolName: __guard_fids_table Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 24 SymbolName: __guard_fids_table Type: IMAGE_REL_AMD64_SECTION - Name: '.text$mn' Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 16 SectionData: 488D0500000000C3 - Relocations: + Relocations: - VirtualAddress: 3 SymbolName: __guard_fids_table Type: IMAGE_REL_AMD64_REL32 -symbols: +symbols: - Name: '.debug$S' Value: 0 SectionNumber: 1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 372 NumberOfRelocations: 6 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.text$mn' Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 8 NumberOfRelocations: 1 NumberOfLinenumbers: 0 CheckSum: 1092178131 Number: 0 - Name: main Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL - Name: __guard_fids_table Value: 0 SectionNumber: 0 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/test/COFF/pdb-symbol-types.yaml b/test/COFF/pdb-symbol-types.yaml index 4951aa8be379..eceb434f0d0f 100644 --- a/test/COFF/pdb-symbol-types.yaml +++ b/test/COFF/pdb-symbol-types.yaml @@ -1,344 +1,344 @@ # RUN: yaml2obj %s -o %t.obj # RUN: lld-link %t.obj -nodefaultlib -entry:main -debug -out:%t.exe -pdb:%t.pdb # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s # To regenerate the object file: # $ cat symbol-types.c # struct Foo { int x; }; # typedef struct Foo UDT_Foo; # UDT_Foo global_foo = {42}; # int main() { return global_foo.x; } # $ cl -c -Z7 symbol-types.c # Note that the type of 'global' goes from 0x1005 in the object file to 0x1004 # in the PDB because the LF_FUNC_ID is moved to the id stream. # CHECK: Symbols # CHECK: ============================================================ # CHECK-LABEL: Mod 0000 | `{{.*}}pdb-symbol-types.yaml.tmp.obj`: -# CHECK: - S_OBJNAME [size = 52] sig=0, `C:\src\llvm-project\build\symbol-types.obj` -# CHECK: - S_COMPILE3 [size = 60] -# CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c -# CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 -# CHECK: flags = security checks | hot patchable -# CHECK: - S_GPROC32_ID [size = 44] `main` -# CHECK: parent = 0, addr = 0002:0000, code size = 7, end = 0 -# CHECK: debug start = 0, debug end = 6, flags = none -# CHECK: - S_FRAMEPROC [size = 32] -# CHECK: size = 0, padding size = 0, offset to padding = 0 -# CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 -# CHECK: flags = has async eh | opt speed -# CHECK: - S_END [size = 4] -# CHECK: - S_GDATA32 [size = 28] `global_foo` -# CHECK: type = 0x1004 (Foo), addr = 0001:0000 -# CHECK: - S_UDT [size = 16] `UDT_Foo` -# CHECK: original type = 0x1004 -# CHECK: - S_UDT [size = 12] `Foo` -# CHECK: original type = 0x1004 -# CHECK: - S_BUILDINFO [size = 8] BuildId = `4106` +# CHECK: 4 | S_OBJNAME [size = 52] sig=0, `C:\src\llvm-project\build\symbol-types.obj` +# CHECK: 56 | S_COMPILE3 [size = 60] +# CHECK: machine = intel x86-x64, Ver = Microsoft (R) Optimizing Compiler, language = c +# CHECK: frontend = 19.0.24215.1, backend = 19.0.24215.1 +# CHECK: flags = security checks | hot patchable +# CHECK: 116 | S_GPROC32_ID [size = 44] `main` +# CHECK: parent = 0, end = 0, addr = 0002:0000, code size = 7 +# CHECK: debug start = 0, debug end = 6, flags = none +# CHECK: 160 | S_FRAMEPROC [size = 32] +# CHECK: size = 0, padding size = 0, offset to padding = 0 +# CHECK: bytes of callee saved registers = 0, exception handler addr = 0000:0000 +# CHECK: flags = has async eh | opt speed +# CHECK: 192 | S_END [size = 4] +# CHECK: 196 | S_GDATA32 [size = 28] `global_foo` +# CHECK: type = 0x1004 (Foo), addr = 0001:0000 +# CHECK: 224 | S_UDT [size = 16] `UDT_Foo` +# CHECK: original type = 0x1004 +# CHECK: 240 | S_UDT [size = 12] `Foo` +# CHECK: original type = 0x1004 +# CHECK: 252 | S_BUILDINFO [size = 8] BuildId = `4106` # CHECK-LABEL: Mod 0001 | `* Linker *`: --- !COFF -header: +header: Machine: IMAGE_FILE_MACHINE_AMD64 Characteristics: [ ] -sections: +sections: - Name: .drectve Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ] Alignment: 1 SectionData: 2020202F44454641554C544C49423A224C4942434D5422202F44454641554C544C49423A224F4C444E414D45532220 - Name: '.debug$S' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 - Subsections: + Subsections: - !Symbols - Records: + Records: - Kind: S_OBJNAME - ObjNameSym: + ObjNameSym: Signature: 0 ObjectName: 'C:\src\llvm-project\build\symbol-types.obj' - Kind: S_COMPILE3 - Compile3Sym: + Compile3Sym: Flags: [ SecurityChecks, HotPatch ] Machine: X64 FrontendMajor: 19 FrontendMinor: 0 FrontendBuild: 24215 FrontendQFE: 1 BackendMajor: 19 BackendMinor: 0 BackendBuild: 24215 BackendQFE: 1 Version: 'Microsoft (R) Optimizing Compiler' - !Symbols - Records: + Records: - Kind: S_GPROC32_ID - ProcSym: + ProcSym: CodeSize: 7 DbgStart: 0 DbgEnd: 6 FunctionType: 4098 Flags: [ ] DisplayName: main - Kind: S_FRAMEPROC - FrameProcSym: + FrameProcSym: TotalFrameBytes: 0 PaddingFrameBytes: 0 OffsetToPadding: 0 BytesOfCalleeSavedRegisters: 0 OffsetOfExceptionHandler: 0 SectionIdOfExceptionHandler: 0 Flags: [ AsynchronousExceptionHandling, OptimizedForSpeed ] - Kind: S_PROC_ID_END - ScopeEndSym: + ScopeEndSym: - !Lines CodeSize: 7 Flags: [ ] RelocOffset: 0 RelocSegment: 0 - Blocks: + Blocks: - FileName: 'c:\src\llvm-project\build\symbol-types.c' - Lines: + Lines: - Offset: 0 LineStart: 4 IsStatement: true EndDelta: 0 - Offset: 0 LineStart: 5 IsStatement: true EndDelta: 0 - Offset: 6 LineStart: 6 IsStatement: true EndDelta: 0 - Columns: + Columns: - !Symbols - Records: + Records: - Kind: S_GDATA32 - DataSym: + DataSym: Type: 4101 DisplayName: global_foo - Kind: S_UDT - UDTSym: + UDTSym: Type: 4101 UDTName: UDT_Foo - Kind: S_UDT - UDTSym: + UDTSym: Type: 4101 UDTName: Foo - !FileChecksums - Checksums: + Checksums: - FileName: 'c:\src\llvm-project\build\symbol-types.c' Kind: MD5 Checksum: F833E1A4909FF6FEC5689A664F3BE725 - !StringTable - Strings: + Strings: - 'c:\src\llvm-project\build\symbol-types.c' - !Symbols - Records: + Records: - Kind: S_BUILDINFO - BuildInfoSym: + BuildInfoSym: BuildId: 4111 - Relocations: + Relocations: - VirtualAddress: 164 SymbolName: main Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 168 SymbolName: main Type: IMAGE_REL_AMD64_SECTION - VirtualAddress: 220 SymbolName: main Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 224 SymbolName: main Type: IMAGE_REL_AMD64_SECTION - VirtualAddress: 284 SymbolName: global_foo Type: IMAGE_REL_AMD64_SECREL - VirtualAddress: 288 SymbolName: global_foo Type: IMAGE_REL_AMD64_SECTION - Name: '.debug$T' Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] Alignment: 1 - Types: + Types: - Kind: LF_ARGLIST - ArgList: + ArgList: ArgIndices: [ 0 ] - Kind: LF_PROCEDURE - Procedure: + Procedure: ReturnType: 116 CallConv: NearC Options: [ None ] ParameterCount: 0 ArgumentList: 4096 - Kind: LF_FUNC_ID - FuncId: + FuncId: ParentScope: 0 FunctionType: 4097 Name: main - Kind: LF_STRUCTURE - Class: + Class: MemberCount: 0 Options: [ None, ForwardReference, HasUniqueName ] FieldList: 0 Name: Foo UniqueName: '.?AUFoo@@' DerivationList: 0 VTableShape: 0 Size: 0 - Kind: LF_FIELDLIST - FieldList: + FieldList: - Kind: LF_MEMBER - DataMember: + DataMember: Attrs: 3 Type: 116 FieldOffset: 0 Name: x - Kind: LF_STRUCTURE - Class: + Class: MemberCount: 1 Options: [ None, HasUniqueName ] FieldList: 4100 Name: Foo UniqueName: '.?AUFoo@@' DerivationList: 0 VTableShape: 0 Size: 4 - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: 'c:\src\llvm-project\build\symbol-types.c' - Kind: LF_UDT_SRC_LINE - UdtSourceLine: + UdtSourceLine: UDT: 4101 SourceFile: 4102 LineNumber: 1 - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: 'C:\src\llvm-project\build' - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: 'C:\PROGRA~2\MICROS~1.0\VC\Bin\amd64\cl.exe' - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: '-c -Z7 -MT -IC:\PROGRA~2\MICROS~1.0\VC\include -IC:\PROGRA~2\MICROS~1.0\VC\atlmfc\include -IC:\PROGRA~2\WI3CF2~1\10\include\10.0.14393.0\ucrt -IC:\PROGRA~2\WI3CF2~1\10\include\10.0.14393.0\shared -IC:\PROGRA~2\WI3CF2~1\10\include\10.0.14393.0\um' - Kind: LF_SUBSTR_LIST - StringList: + StringList: StringIndices: [ 4106 ] - Kind: LF_STRING_ID - StringId: + StringId: Id: 4107 String: ' -IC:\PROGRA~2\WI3CF2~1\10\include\10.0.14393.0\winrt -TC -X' - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: symbol-types.c - Kind: LF_STRING_ID - StringId: + StringId: Id: 0 String: 'C:\src\llvm-project\build\vc140.pdb' - Kind: LF_BUILDINFO - BuildInfo: + BuildInfo: ArgIndices: [ 4104, 4105, 4109, 4110, 4108 ] - Name: .data Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] Alignment: 4 SectionData: 2A000000 - Name: '.text$mn' Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 16 SectionData: 8B0500000000C3 - Relocations: + Relocations: - VirtualAddress: 2 SymbolName: global_foo Type: IMAGE_REL_AMD64_REL32 -symbols: +symbols: - Name: '@comp.id' Value: 17063575 SectionNumber: -1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - Name: '@feat.00' Value: 2147484048 SectionNumber: -1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - Name: .drectve Value: 0 SectionNumber: 1 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 47 NumberOfRelocations: 0 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.debug$S' Value: 0 SectionNumber: 2 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 432 NumberOfRelocations: 6 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: '.debug$T' Value: 0 SectionNumber: 3 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 732 NumberOfRelocations: 0 NumberOfLinenumbers: 0 CheckSum: 0 Number: 0 - Name: .data Value: 0 SectionNumber: 4 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 4 NumberOfRelocations: 0 NumberOfLinenumbers: 0 CheckSum: 3482275674 Number: 0 - Name: global_foo Value: 0 SectionNumber: 4 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_EXTERNAL - Name: '.text$mn' Value: 0 SectionNumber: 5 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_STATIC - SectionDefinition: + SectionDefinition: Length: 7 NumberOfRelocations: 1 NumberOfLinenumbers: 0 CheckSum: 3635526833 Number: 0 - Name: main Value: 0 SectionNumber: 5 SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/test/COFF/reloc-discarded.s b/test/COFF/reloc-discarded.s new file mode 100644 index 000000000000..94eaba998330 --- /dev/null +++ b/test/COFF/reloc-discarded.s @@ -0,0 +1,30 @@ +# RUN: echo -e '.section .bss,"bw",discard,main_global\n.global main_global\n main_global:\n .long 0' | \ +# RUN: llvm-mc - -filetype=obj -o %t1.obj -triple x86_64-windows-msvc +# RUN: llvm-mc %s -filetype=obj -o %t2.obj -triple x86_64-windows-msvc + +# LLD should report an error and not assert regardless of whether we are doing +# GC. + +# RUN: not lld-link -entry:main -nodefaultlib %t1.obj %t2.obj -out:%t.exe -opt:ref 2>&1 | FileCheck %s +# RUN: not lld-link -entry:main -nodefaultlib %t1.obj %t2.obj -out:%t.exe -opt:noref 2>&1 | FileCheck %s + +# CHECK: error: relocation against symbol in discarded section: assoc_global + + .section .bss,"bw",discard,main_global + .globl main_global + .p2align 2 +main_global: + .long 0 + + .section .CRT$XCU,"dr",associative,main_global + .p2align 3 + .globl assoc_global +assoc_global: + .quad main_global + + .text + .globl main +main: + movq assoc_global(%rip), %rax + movl (%rax), %eax + retq diff --git a/test/COFF/resource.test b/test/COFF/resource.test index a73a20258201..53242cdcb63a 100644 --- a/test/COFF/resource.test +++ b/test/COFF/resource.test @@ -1,12 +1,44 @@ # RUN: yaml2obj < %p/Inputs/ret42.yaml > %t.obj # RUN: lld-link /out:%t.exe /entry:main %t.obj %p/Inputs/resource.res # Check if the binary contains UTF-16LE string "Hello" copied from resource.res. # RUN: FileCheck --check-prefix=EXE %s < %t.exe EXE: {{H.e.l.l.o}} -# RUN: llvm-readobj -file-headers %t.exe | FileCheck --check-prefix=HEADER %s +# Verify the resource tree layout in the final executable. +# RUN: llvm-readobj -file-headers -coff-resources -section-data %t.exe | \ +# RUN: FileCheck --check-prefix=RESOURCE_INFO %s -HEADER: ResourceTableRVA: 0x1000 -HEADER: ResourceTableSize: 0x88 +RESOURCE_INFO: ResourceTableRVA: 0x1000 +RESOURCE_INFO-NEXT: ResourceTableSize: 0x88 +RESOURCE_INFO-DAG: Resources [ +RESOURCE_INFO-NEXT: Total Number of Resources: 1 +RESOURCE_INFO-NEXT: Base Table Address: 0x400 +RESOURCE_INFO-DAG: Number of String Entries: 0 +RESOURCE_INFO-NEXT: Number of ID Entries: 1 +RESOURCE_INFO-NEXT: Type: kRT_STRING (ID 6) [ +RESOURCE_INFO-NEXT: Table Offset: 0x18 +RESOURCE_INFO-NEXT: Number of String Entries: 0 +RESOURCE_INFO-NEXT: Number of ID Entries: 1 +RESOURCE_INFO-NEXT: Name: (ID 1) [ +RESOURCE_INFO-NEXT: Table Offset: 0x30 +RESOURCE_INFO-NEXT: Number of String Entries: 0 +RESOURCE_INFO-NEXT: Number of ID Entries: 1 +RESOURCE_INFO-NEXT: Language: (ID 1033) [ +RESOURCE_INFO-NEXT: Entry Offset: 0x48 +RESOURCE_INFO-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0) +RESOURCE_INFO-NEXT: Major Version: 0 +RESOURCE_INFO-NEXT: Minor Version: 0 +RESOURCE_INFO-NEXT: Characteristics: 0 +RESOURCE_INFO-DAG: .rsrc Data ( +RESOURCE_INFO-NEXT: 0000: 00000000 00000000 00000000 00000100 |................| +RESOURCE_INFO-NEXT: 0010: 06000000 18000080 00000000 00000000 |................| +RESOURCE_INFO-NEXT: 0020: 00000000 00000100 01000000 30000080 |............0...| +RESOURCE_INFO-NEXT: 0030: 00000000 00000000 00000000 00000100 |................| +RESOURCE_INFO-NEXT: 0040: 09040000 48000000 58100000 2A000000 |....H...X...*...| +RESOURCE_INFO-NEXT: 0050: 00000000 00000000 00000500 48006500 |............H.e.| +RESOURCE_INFO-NEXT: 0060: 6C006C00 6F000000 00000000 00000000 |l.l.o...........| +RESOURCE_INFO-NEXT: 0070: 00000000 00000000 00000000 00000000 |................| +RESOURCE_INFO-NEXT: 0080: 00000000 00000000 |........| +RESOURCE_INFO-NEXT: ) diff --git a/test/COFF/secrel-absolute.s b/test/COFF/secrel-absolute.s index 69b5ab92991b..bc61fb94b6b0 100644 --- a/test/COFF/secrel-absolute.s +++ b/test/COFF/secrel-absolute.s @@ -1,14 +1,14 @@ # RUN: llvm-mc %s -filetype=obj -triple=x86_64-windows-msvc -o %t.obj # RUN: not lld-link -entry:main -nodefaultlib %t.obj -out:%t.exe 2>&1 | FileCheck %s # secrel relocations against absolute symbols are errors. -# CHECK: SECREL relocation points to a non-regular symbol: __guard_fids_table +# CHECK: SECREL relocation cannot be applied to absolute symbols .text .global main main: ret .section .rdata,"dr" .secrel32 __guard_fids_table diff --git a/test/ELF/Inputs/dso-undef-size.s b/test/ELF/Inputs/dso-undef-size.s new file mode 100644 index 000000000000..424f56f82b72 --- /dev/null +++ b/test/ELF/Inputs/dso-undef-size.s @@ -0,0 +1,4 @@ +.text +.global foo +.size foo, 4 +foo: diff --git a/test/ELF/arm-exidx-canunwind.s b/test/ELF/arm-exidx-canunwind.s index e98ec0ec8978..96a7808e8e84 100644 --- a/test/ELF/arm-exidx-canunwind.s +++ b/test/ELF/arm-exidx-canunwind.s @@ -1,99 +1,99 @@ // RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t // RUN: ld.lld %t -o %t2 2>&1 // RUN: llvm-objdump -d -triple=armv7a-none-linux-gnueabi %t2 | FileCheck %s // RUN: llvm-objdump -s -triple=armv7a-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EXIDX %s // RUN: llvm-readobj --program-headers --sections %t2 | FileCheck -check-prefix=CHECK-PT %s // REQUIRES: arm // Test that inline unwinding table entries and references to .ARM.extab // entries survive the re-ordering of the .ARM.exidx section .syntax unified // Will produce an ARM.exidx entry with inline unwinding instructions .section .text.func1, "ax",%progbits .global func1 func1: .fnstart bx lr .save {r7, lr} .setfp r7, sp, #0 .fnend // Unwinding instructions for .text2 too large for an inline entry ARM.exidx // entry. A separate .ARM.extab section is created to hold the unwind entries // The .ARM.exidx table entry has a reference to the .ARM.extab section. .section .text.func2, "ax",%progbits .global func2 func2: .fnstart bx lr .personality __gxx_personality_v0 .handlerdata .long 0 .section .text.func2 .fnend // Dummy implementation of personality routines to satisfy reference from // exception tables .section .text.__gcc_personality_v0, "ax", %progbits .global __gxx_personality_v0 __gxx_personality_v0: bx lr .section .text.__aeabi_unwind_cpp_pr0, "ax", %progbits .global __aeabi_unwind_cpp_pr0 __aeabi_unwind_cpp_pr0: bx lr .text .global _start _start: bl func1 bl func2 bx lr // CHECK: Disassembly of section .text: // CHECK-NEXT: _start: // CHECK-NEXT: 11000: 01 00 00 eb bl #4 // CHECK-NEXT: 11004: 01 00 00 eb bl #4 // CHECK-NEXT: 11008: 1e ff 2f e1 bx lr // CHECK: func1: // CHECK-NEXT: 1100c: 1e ff 2f e1 bx lr // CHECK: func2: // CHECK-NEXT: 11010: 1e ff 2f e1 bx lr // CHECK: __gxx_personality_v0: // CHECK-NEXT: 11014: 1e ff 2f e1 bx lr // CHECK: __aeabi_unwind_cpp_pr0: // CHECK-NEXT: 11018: 1e ff 2f e1 bx lr // CHECK-EXIDX: Contents of section .ARM.exidx: // 100d4 + f38 = 1100c = func1 (inline unwinding data) // 100dc + f34 = 11010 = func2 (100e0 + c = 100ec = .ARM.extab entry) // CHECK-EXIDX-NEXT: 100d4 380f0000 08849780 340f0000 0c000000 // 100e4 + f30 = 11014 = terminate = func2 + sizeof(func2) // CHECK-EXIDX-NEXT: 100e4 300f0000 01000000 -// CHECK-EXIDX-NEXT: Contents of section .ARM.extab.text.func2: +// CHECK-EXIDX-NEXT: Contents of section .ARM.extab: // 100ec + f28 = 11014 = __gxx_personality_v0 // CHECK-EXIDX-NEXT: 100ec 280f0000 b0b0b000 00000000 // CHECK-PT: Name: .ARM.exidx // CHECK-PT-NEXT: Type: SHT_ARM_EXIDX (0x70000001) // CHECK-PT-NEXT: Flags [ // CHECK-PT-NEXT: SHF_ALLOC // CHECK-PT-NEXT: SHF_LINK_ORDER // CHECK-PT-NEXT: ] // CHECK-PT-NEXT: Address: 0x100D4 // CHECK-PT-NEXT: Offset: 0xD4 // CHECK-PT-NEXT: Size: 24 // CHECK-PT: Type: PT_ARM_EXIDX (0x70000001) // CHECK-PT-NEXT: Offset: 0xD4 // CHECK-PT-NEXT: VirtualAddress: 0x100D4 // CHECK-PT-NEXT: PhysicalAddress: 0x100D4 // CHECK-PT-NEXT: FileSize: 24 // CHECK-PT-NEXT: MemSize: 24 // CHECK-PT-NEXT: Flags [ (0x4) // CHECK-PT-NEXT: PF_R (0x4) // CHECK-PT-NEXT: ] // CHECK-PT-NEXT: Alignment: 4 // CHECK-PT-NEXT: } diff --git a/test/ELF/arm-exidx-gc.s b/test/ELF/arm-exidx-gc.s index b1a5be54aa2b..1336c256f7c1 100644 --- a/test/ELF/arm-exidx-gc.s +++ b/test/ELF/arm-exidx-gc.s @@ -1,125 +1,124 @@ // RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t // RUN: ld.lld %t -o %t2 --gc-sections 2>&1 // RUN: llvm-objdump -d -triple=armv7a-none-linux-gnueabi %t2 | FileCheck %s // RUN: llvm-objdump -s -triple=armv7a-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EXIDX %s // REQUIRES: arm // Test the behavior of .ARM.exidx sections under garbage collection // A .ARM.exidx section is live if it has a relocation to a live executable // section. // A .ARM.exidx section may have a relocation to a .ARM.extab section, if the // .ARM.exidx is live then the .ARM.extab section is live .syntax unified .section .text.func1, "ax",%progbits .global func1 func1: .fnstart bx lr .save {r7, lr} .setfp r7, sp, #0 .fnend .section .text.unusedfunc1, "ax",%progbits .global unusedfunc1 unusedfunc1: .fnstart bx lr .cantunwind .fnend // Unwinding instructions for .text2 too large for an inline entry ARM.exidx // entry. A separate .ARM.extab section is created to hold the unwind entries // The .ARM.exidx table entry has a reference to the .ARM.extab section. .section .text.func2, "ax",%progbits .global func2 func2: .fnstart bx lr .personality __gxx_personality_v0 .handlerdata .section .text.func2 .fnend // An unused function with a reference to a .ARM.extab section. Both should // be removed by gc. .section .text.unusedfunc2, "ax",%progbits .global unusedfunc2 unusedfunc2: .fnstart bx lr .personality __gxx_personality_v1 .handlerdata .section .text.unusedfunc2 .fnend // Dummy implementation of personality routines to satisfy reference from // exception tables .section .text.__gcc_personality_v0, "ax", %progbits .global __gxx_personality_v0 __gxx_personality_v0: .fnstart bx lr .cantunwind .fnend .section .text.__gcc_personality_v1, "ax", %progbits .global __gxx_personality_v1 __gxx_personality_v1: .fnstart bx lr .cantunwind .fnend .section .text.__aeabi_unwind_cpp_pr0, "ax", %progbits .global __aeabi_unwind_cpp_pr0 __aeabi_unwind_cpp_pr0: .fnstart bx lr .cantunwind .fnend // Entry point for GC .text .global _start _start: bl func1 bl func2 bx lr // GC should have only removed unusedfunc1 and unusedfunc2 the personality // routines are kept alive by references from live .ARM.exidx and .ARM.extab // sections // CHECK: Disassembly of section .text: // CHECK-NEXT: _start: // CHECK-NEXT: 11000: 01 00 00 eb bl #4 // CHECK-NEXT: 11004: 01 00 00 eb bl #4 // CHECK-NEXT: 11008: 1e ff 2f e1 bx lr // CHECK: func1: // CHECK-NEXT: 1100c: 1e ff 2f e1 bx lr // CHECK: func2: // CHECK-NEXT: 11010: 1e ff 2f e1 bx lr // CHECK: __gxx_personality_v0: // CHECK-NEXT: 11014: 1e ff 2f e1 bx lr // CHECK: __aeabi_unwind_cpp_pr0: // CHECK-NEXT: 11018: 1e ff 2f e1 bx lr // GC should have removed table entries for unusedfunc1, unusedfunc2 // and __gxx_personality_v1 // CHECK-NOT: unusedfunc1 // CHECK-NOT: unusedfunc2 // CHECK-NOT: __gxx_personality_v1 -// CHECK-EXIDX-NOT: Contents of section .ARM.extab.text.unusedfunc2: // CHECK-EXIDX: Contents of section .ARM.exidx: // 100d4 + f38 = 1100c = func1 -// 100dc + f34 = 11010 = func2 (100e0 + 1c = 100fc = .ARM.extab.text.func2) +// 100dc + f34 = 11010 = func2 (100e0 + 1c = 100fc = .ARM.extab) // CHECK-EXIDX-NEXT: 100d4 380f0000 08849780 340f0000 1c000000 // 100e4 + f30 = 11014 = __gxx_personality_v0 // 100ec + f2c = 11018 = __aeabi_unwind_cpp_pr0 // CHECK-EXIDX-NEXT: 100e4 300f0000 01000000 2c0f0000 01000000 // 100f4 + f28 = 1101c = __aeabi_unwind_cpp_pr0 + sizeof(__aeabi_unwind_cpp_pr0) // CHECK-EXIDX-NEXT: 100f4 280f0000 01000000 -// CHECK-EXIDX-NEXT: Contents of section .ARM.extab.text.func2: +// CHECK-EXIDX-NEXT: Contents of section .ARM.extab: // 100fc + f18 = 11014 = __gxx_personality_v0 // CHECK-EXIDX-NEXT: 100fc 180f0000 b0b0b000 diff --git a/test/ELF/arm-exidx-shared.s b/test/ELF/arm-exidx-shared.s index 13628405ed35..e06733352a37 100644 --- a/test/ELF/arm-exidx-shared.s +++ b/test/ELF/arm-exidx-shared.s @@ -1,45 +1,45 @@ // RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t // RUN: ld.lld %t --shared -o %t2 2>&1 // RUN: llvm-readobj --relocations %t2 | FileCheck %s // RUN: llvm-objdump -s -triple=armv7a-none-linux-gnueabi %t2 | FileCheck -check-prefix=CHECK-EXTAB %s // REQUIRES: arm // Check that the relative R_ARM_PREL31 relocation can access a PLT entry // for when the personality routine is referenced from a shared library. // Also check that the R_ARM_NONE no-op relocation can be used in a shared // library. .syntax unified // Will produce an ARM.exidx entry with an R_ARM_NONE relocation to // __aeabi_unwind_cpp_pr0 .section .text.func1, "ax",%progbits .global func1 func1: .fnstart bx lr .fnend // Will produce a R_ARM_PREL31 relocation with respect to the PLT entry of // __gxx_personality_v0 .section .text.func2, "ax",%progbits .global func2 func2: .fnstart bx lr .personality __gxx_personality_v0 .handlerdata .long 0 .section .text.func2 .fnend .section .text.__aeabi_unwind_cpp_pr0, "ax", %progbits .global __aeabi_unwind_cpp_pr0 __aeabi_unwind_cpp_pr0: bx lr // CHECK: Relocations [ // CHECK-NEXT: Section (6) .rel.plt { // CHECK-NEXT: 0x200C R_ARM_JUMP_SLOT __gxx_personality_v0 -// CHECK-EXTAB: Contents of section .ARM.extab.text.func2: +// CHECK-EXTAB: Contents of section .ARM.extab: // 014c + 0ed8 = 0x1024 = __gxx_personality_v0(PLT) // CHECK-EXTAB-NEXT: 014c d80e0000 b0b0b000 00000000 diff --git a/test/ELF/basic-sparcv9.s b/test/ELF/basic-sparcv9.s new file mode 100644 index 000000000000..983224c52913 --- /dev/null +++ b/test/ELF/basic-sparcv9.s @@ -0,0 +1,200 @@ +# RUN: llvm-mc -filetype=obj -triple=sparc64-unknown-openbsd %s -o %t +# RUN: ld.lld %t -o %t2 +# RUN: llvm-readobj -file-headers -sections -program-headers -symbols %t2 \ +# RUN: | FileCheck %s +# REQUIRES: sparc + +# exits with return code 42 on OpenBSD/sparc64 +.global _start +_start: + mov 42, %o0 + mov 1, %g1 + ta 0 + +# CHECK: ElfHeader { +# CHECK-NEXT: Ident { +# CHECK-NEXT: Magic: (7F 45 4C 46) +# CHECK-NEXT: Class: 64-bit (0x2) +# CHECK-NEXT: DataEncoding: BigEndian (0x2) +# CHECK-NEXT: FileVersion: 1 +# CHECK-NEXT: OS/ABI: SystemV (0x0) +# CHECK-NEXT: ABIVersion: 0 +# CHECK-NEXT: Unused: (00 00 00 00 00 00 00) +# CHECK-NEXT: } +# CHECK-NEXT: Type: Executable (0x2) +# CHECK-NEXT: Machine: EM_SPARCV9 (0x2B) +# CHECK-NEXT: Version: 1 +# CHECK-NEXT: Entry: [[ENTRY:0x[0-9A-F]+]] +# CHECK-NEXT: ProgramHeaderOffset: 0x40 +# CHECK-NEXT: SectionHeaderOffset: 0x100080 +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: HeaderSize: 64 +# CHECK-NEXT: ProgramHeaderEntrySize: 56 +# CHECK-NEXT: ProgramHeaderCount: 4 +# CHECK-NEXT: SectionHeaderEntrySize: 64 +# CHECK-NEXT: SectionHeaderCount: 6 +# CHECK-NEXT: StringTableSectionIndex: 4 +# CHECK-NEXT: } +# CHECK-NEXT: Sections [ +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: Name: (0) +# CHECK-NEXT: Type: SHT_NULL (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 0 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: Name: .text +# CHECK-NEXT: Type: SHT_PROGBITS (0x1) +# CHECK-NEXT: Flags [ (0x6) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: SHF_EXECINSTR (0x4) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x200000 +# CHECK-NEXT: Offset: 0x100000 +# CHECK-NEXT: Size: 12 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 4 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: Name: .comment +# CHECK-NEXT: Type: SHT_PROGBITS (0x1) +# CHECK-NEXT: Flags [ (0x30) +# CHECK-NEXT: SHF_MERGE (0x10) +# CHECK-NEXT: SHF_STRINGS (0x20) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0x10000C +# CHECK-NEXT: Size: 8 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 1 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: Name: .symtab +# CHECK-NEXT: Type: SHT_SYMTAB (0x2) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0x100018 +# CHECK-NEXT: Size: 48 +# CHECK-NEXT: Link: 5 +# CHECK-NEXT: Info: 1 +# CHECK-NEXT: AddressAlignment: 8 +# CHECK-NEXT: EntrySize: 24 +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 4 +# CHECK-NEXT: Name: .shstrtab +# CHECK-NEXT: Type: SHT_STRTAB (0x3) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0x100048 +# CHECK-NEXT: Size: 42 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 1 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: Name: .strtab +# CHECK-NEXT: Type: SHT_STRTAB (0x3) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0x100072 +# CHECK-NEXT: Size: 8 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 1 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (0) +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Local (0x0) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined (0x0) +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _start +# CHECK-NEXT: Value: [[ENTRY]] +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global (0x1) +# CHECK-NEXT: Type: None (0x0) +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: .text +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: ProgramHeaders [ +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_PHDR (0x6) +# CHECK-NEXT: Offset: 0x40 +# CHECK-NEXT: VirtualAddress: 0x100040 +# CHECK-NEXT: PhysicalAddress: 0x100040 +# CHECK-NEXT: FileSize: 224 +# CHECK-NEXT: MemSize: 224 +# CHECK-NEXT: Flags [ (0x4) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 8 +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD (0x1) +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: VirtualAddress: 0x100000 +# CHECK-NEXT: PhysicalAddress: 0x100000 +# CHECK-NEXT: FileSize: 288 +# CHECK-NEXT: MemSize: 288 +# CHECK-NEXT: Flags [ +# CHECK-NEXT: PF_R +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 1048576 +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD (0x1) +# CHECK-NEXT: Offset: 0x100000 +# CHECK-NEXT: VirtualAddress: 0x200000 +# CHECK-NEXT: PhysicalAddress: 0x200000 +# CHECK-NEXT: FileSize: 12 +# CHECK-NEXT: MemSize: 12 +# CHECK-NEXT: Flags [ (0x5) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: PF_X (0x1) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 1048576 +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_GNU_STACK +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: VirtualAddress: 0x0 +# CHECK-NEXT: PhysicalAddress: 0x0 +# CHECK-NEXT: FileSize: 0 +# CHECK-NEXT: MemSize: 0 +# CHECK-NEXT: Flags [ +# CHECK-NEXT: PF_R +# CHECK-NEXT: PF_W +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: } +# CHECK-NEXT: ] diff --git a/test/ELF/dso-undef-size.s b/test/ELF/dso-undef-size.s new file mode 100644 index 000000000000..5a235565bbc9 --- /dev/null +++ b/test/ELF/dso-undef-size.s @@ -0,0 +1,32 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/dso-undef-size.s -o %t1.o +# RUN: ld.lld -shared %t1.o -o %t1.so +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t2.o +# RUN: ld.lld -shared %t2.o %t1.so -o %t2.so +# RUN: llvm-readobj -symbols -dyn-symbols %t2.so + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Value: +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: +# CHECK-NEXT: Type: +# CHECK-NEXT: Other: +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK: DynamicSymbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Value: +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: +# CHECK-NEXT: Type: +# CHECK-NEXT: Other: +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.text +.global foo diff --git a/test/ELF/linkerscript/data-commands-gc.s b/test/ELF/linkerscript/data-commands-gc.s index 46ce6a97cf75..1afcc9a3bb81 100644 --- a/test/ELF/linkerscript/data-commands-gc.s +++ b/test/ELF/linkerscript/data-commands-gc.s @@ -1,16 +1,17 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o # RUN: echo "SECTIONS { .text : { *(.text*) QUAD(bar) } }" > %t.script -# RUN: ld.lld --gc-sections -o %t %t.o --script %t.script | FileCheck -allow-empty %s +# RUN: ld.lld --gc-sections -o %t %t.o --script %t.script +# RUN: llvm-objdump -t %t | FileCheck %s -# CHECK-NOT: unable to evaluate expression: input section .rodata.bar has no output section assigned +# CHECK: 0000000000000011 .rodata 00000000 bar .section .rodata.bar .quad 0x1122334455667788 .global bar bar: .section .text .global _start _start: nop diff --git a/test/ELF/relocatable-script.s b/test/ELF/relocatable-script.s new file mode 100644 index 000000000000..133d61f48056 --- /dev/null +++ b/test/ELF/relocatable-script.s @@ -0,0 +1,7 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux -o %t1.o %s +# RUN: echo "SECTIONS { .foo : { BYTE(0x0) } }" > %t.script +# RUN: ld.lld -r %t1.o -script %t.script -o %t2.o +# RUN: llvm-readobj -sections %t2.o | FileCheck %s + +# CHECK: Name: .foo diff --git a/test/ELF/version-script-symver.s b/test/ELF/version-script-symver.s new file mode 100644 index 000000000000..7798330b053d --- /dev/null +++ b/test/ELF/version-script-symver.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: echo "VERSION { global: *; };" > %t.map +# RUN: ld.lld %t.o --version-script %t.map -o %t + +.global _start +.global bar +.symver _start, bar@@VERSION +_start: + jmp bar diff --git a/test/lit.cfg b/test/lit.cfg index 0cb62d4cab97..cba56c642907 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -1,268 +1,270 @@ # -*- Python -*- import os import platform import re import subprocess import locale import lit.formats import lit.util # Configuration file for the 'lit' test runner. # name: The name of this test suite. config.name = 'lld' # Tweak PATH for Win32 if sys.platform in ['win32']: # Seek sane tools in directories and set to $PATH. path = getattr(config, 'lit_tools_dir', None) path = lit_config.getToolsPath(path, config.environment['PATH'], ['cmp.exe', 'grep.exe', 'sed.exe']) if path is not None: path = os.path.pathsep.join((path, config.environment['PATH'])) config.environment['PATH'] = path # Choose between lit's internal shell pipeline runner and a real shell. If # LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override. use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") if use_lit_shell: # 0 is external, "" is default, and everything else is internal. execute_external = (use_lit_shell == "0") else: # Otherwise we default to internal on Windows and external elsewhere, as # bash on Windows is usually very slow. execute_external = (not sys.platform in ['win32']) # testFormat: The test format to use to interpret tests. # # For now we require '&&' between commands, until they get globally killed and # the test runner updated. config.test_format = lit.formats.ShTest(execute_external) # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.ll', '.s', '.test', '.yaml', '.objtxt'] # excludes: A list of directories to exclude from the testsuite. The 'Inputs' # subdirectories contain auxiliary inputs for various tests in their parent # directories. config.excludes = ['Inputs'] # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) # test_exec_root: The root path where tests should be run. lld_obj_root = getattr(config, 'lld_obj_root', None) if lld_obj_root is not None: config.test_exec_root = os.path.join(lld_obj_root, 'test') # Set llvm_{src,obj}_root for use by others. config.llvm_src_root = getattr(config, 'llvm_src_root', None) config.llvm_obj_root = getattr(config, 'llvm_obj_root', None) # Tweak the PATH to include the tools dir and the scripts dir. if lld_obj_root is not None: lld_tools_dir = getattr(config, 'lld_tools_dir', None) if not lld_tools_dir: lit_config.fatal('No LLD tools dir set!') llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) if not llvm_tools_dir: lit_config.fatal('No LLVM tools dir set!') path = os.path.pathsep.join((lld_tools_dir, llvm_tools_dir, config.environment['PATH'])) path = os.path.pathsep.join((os.path.join(getattr(config, 'llvm_src_root', None),'test','Scripts'),path)) config.environment['PATH'] = path lld_libs_dir = getattr(config, 'lld_libs_dir', None) if not lld_libs_dir: lit_config.fatal('No LLD libs dir set!') llvm_libs_dir = getattr(config, 'llvm_libs_dir', None) if not llvm_libs_dir: lit_config.fatal('No LLVM libs dir set!') path = os.path.pathsep.join((lld_libs_dir, llvm_libs_dir, config.environment.get('LD_LIBRARY_PATH',''))) config.environment['LD_LIBRARY_PATH'] = path # Propagate LLVM_SRC_ROOT into the environment. config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '') # Propagate PYTHON_EXECUTABLE into the environment config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable', '') ### # Check that the object root is known. if config.test_exec_root is None: # Otherwise, we haven't loaded the site specific configuration (the user is # probably trying to run on a test file directly, and either the site # configuration hasn't been created by the build system, or we are in an # out-of-tree build situation). # Check for 'lld_site_config' user parameter, and use that if available. site_cfg = lit_config.params.get('lld_site_config', None) if site_cfg and os.path.exists(site_cfg): lit_config.load_config(config, site_cfg) raise SystemExit # Try to detect the situation where we are using an out-of-tree build by # looking for 'llvm-config'. # # FIXME: I debated (i.e., wrote and threw away) adding logic to # automagically generate the lit.site.cfg if we are in some kind of fresh # build situation. This means knowing how to invoke the build system though, # and I decided it was too much magic. We should solve this by just having # the .cfg files generated during the configuration step. llvm_config = lit.util.which('llvm-config', config.environment['PATH']) if not llvm_config: lit_config.fatal('No site specific configuration available!') # Get the source and object roots. llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip() llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip() lld_src_root = os.path.join(llvm_src_root, "tools", "lld") lld_obj_root = os.path.join(llvm_obj_root, "tools", "lld") # Validate that we got a tree which points to here, using the standard # tools/lld layout. this_src_root = os.path.dirname(config.test_source_root) if os.path.realpath(lld_src_root) != os.path.realpath(this_src_root): lit_config.fatal('No site specific configuration available!') # Check that the site specific configuration exists. site_cfg = os.path.join(lld_obj_root, 'test', 'lit.site.cfg') if not os.path.exists(site_cfg): lit_config.fatal( 'No site specific configuration available! You may need to ' 'run "make test" in your lld build directory.') # Okay, that worked. Notify the user of the automagic, and reconfigure. lit_config.note('using out-of-tree build at %r' % lld_obj_root) lit_config.load_config(config, site_cfg) raise SystemExit # For each occurrence of a lld tool name as its own word, replace it # with the full path to the build directory holding that tool. This # ensures that we are testing the tools just built and not some random # tools that might happen to be in the user's PATH. # Regex assertions to reject neighbor hyphens/dots (seen in some tests). # For example, we want to prefix 'lld' and 'ld.lld' but not the 'lld' inside # of 'ld.lld'. NoPreJunk = r"(?