Index: vendor/lld/dist-release_90/CMakeLists.txt =================================================================== --- vendor/lld/dist-release_90/CMakeLists.txt (revision 351717) +++ vendor/lld/dist-release_90/CMakeLists.txt (revision 351718) @@ -1,226 +1,225 @@ # Check if lld is built as a standalone project. if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(lld) cmake_minimum_required(VERSION 3.4.3) set(CMAKE_INCLUDE_CURRENT_DIR ON) set(LLD_BUILT_STANDALONE TRUE) find_program(LLVM_CONFIG_PATH "llvm-config" DOC "Path to llvm-config binary") if(NOT LLVM_CONFIG_PATH) message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH") endif() execute_process(COMMAND "${LLVM_CONFIG_PATH}" "--obj-root" "--includedir" "--cmakedir" "--src-root" RESULT_VARIABLE HAD_ERROR OUTPUT_VARIABLE LLVM_CONFIG_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE) if(HAD_ERROR) message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") endif() string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" LLVM_CONFIG_OUTPUT "${LLVM_CONFIG_OUTPUT}") list(GET LLVM_CONFIG_OUTPUT 0 OBJ_ROOT) list(GET LLVM_CONFIG_OUTPUT 1 MAIN_INCLUDE_DIR) list(GET LLVM_CONFIG_OUTPUT 2 LLVM_CMAKE_PATH) list(GET LLVM_CONFIG_OUTPUT 3 MAIN_SRC_DIR) set(LLVM_OBJ_ROOT ${OBJ_ROOT} CACHE PATH "path to LLVM build tree") set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include") set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") file(TO_CMAKE_PATH ${LLVM_OBJ_ROOT} LLVM_BINARY_DIR) if(NOT EXISTS "${LLVM_CMAKE_PATH}/LLVMConfig.cmake") message(FATAL_ERROR "LLVMConfig.cmake not found") endif() include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake") list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") include_directories("${LLVM_BINARY_DIR}/include" ${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) include(AddLLVM) include(TableGen) include(HandleLLVMOptions) if(LLVM_INCLUDE_TESTS) - set(Python_ADDITIONAL_VERSIONS 2.7) include(FindPythonInterp) if(NOT PYTHONINTERP_FOUND) message(FATAL_ERROR "Unable to find Python interpreter, required for testing. Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") endif() if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) message(FATAL_ERROR "Python 2.7 or newer is required") endif() # Check prebuilt llvm/utils. if(EXISTS ${LLVM_TOOLS_BINARY_DIR}/FileCheck${CMAKE_EXECUTABLE_SUFFIX} AND EXISTS ${LLVM_TOOLS_BINARY_DIR}/not${CMAKE_EXECUTABLE_SUFFIX}) set(LLVM_UTILS_PROVIDED ON) endif() if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) # Note: path not really used, except for checking if lit was found set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) if(NOT LLVM_UTILS_PROVIDED) add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/FileCheck utils/FileCheck) add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/not utils/not) set(LLVM_UTILS_PROVIDED ON) set(LLD_TEST_DEPS FileCheck not) endif() set(UNITTEST_DIR ${LLVM_MAIN_SRC_DIR}/utils/unittest) if(EXISTS ${UNITTEST_DIR}/googletest/include/gtest/gtest.h AND NOT EXISTS ${LLVM_LIBRARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX} AND EXISTS ${UNITTEST_DIR}/CMakeLists.txt) add_subdirectory(${UNITTEST_DIR} utils/unittest) endif() else() # Seek installed Lit. find_program(LLVM_LIT NAMES llvm-lit lit.py lit PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" DOC "Path to lit.py") endif() if(LLVM_LIT) # Define the default arguments to use with 'lit', and an option for the user # to override. set(LIT_ARGS_DEFAULT "-sv") if (MSVC OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") endif() set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. if(WIN32 AND NOT CYGWIN) set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") endif() else() set(LLVM_INCLUDE_TESTS OFF) endif() endif() endif() set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include ) set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) # Compute the LLD version from the LLVM version. string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION ${PACKAGE_VERSION}) message(STATUS "LLD version: ${LLD_VERSION}") string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR ${LLD_VERSION}) string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR ${LLD_VERSION}) # Determine LLD revision and repository. # TODO: Figure out a way to get the revision and the repository on windows. if ( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" ) execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR} OUTPUT_VARIABLE LLD_REVISION) execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR} OUTPUT_VARIABLE LLD_REPOSITORY) if ( LLD_REPOSITORY ) # Replace newline characters with spaces string(REGEX REPLACE "(\r?\n)+" " " LLD_REPOSITORY ${LLD_REPOSITORY}) # Remove leading spaces STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REPOSITORY "${LLD_REPOSITORY}" ) # Remove trailing spaces string(REGEX REPLACE "(\ )+$" "" LLD_REPOSITORY ${LLD_REPOSITORY}) endif() if ( LLD_REVISION ) # Replace newline characters with spaces string(REGEX REPLACE "(\r?\n)+" " " LLD_REVISION ${LLD_REVISION}) # Remove leading spaces STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REVISION "${LLD_REVISION}" ) # Remove trailing spaces string(REGEX REPLACE "(\ )+$" "" LLD_REVISION ${LLD_REVISION}) endif() endif () # Configure the Version.inc file. configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Common/Version.inc.in ${CMAKE_CURRENT_BINARY_DIR}/include/lld/Common/Version.inc) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite " "the makefiles distributed with LLVM. Please create a directory and run cmake " "from there, passing the path to this source directory as the last argument. " "This process created the file `CMakeCache.txt' and the directory " "`CMakeFiles'. Please delete them.") endif() list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") include(AddLLD) option(LLD_USE_VTUNE "Enable VTune user task tracking." OFF) if (LLD_USE_VTUNE) find_package(VTune) if (VTUNE_FOUND) include_directories(${VTune_INCLUDE_DIRS}) list(APPEND LLVM_COMMON_LIBS ${VTune_LIBRARIES}) add_definitions(-DLLD_HAS_VTUNE) endif() endif() option(LLD_BUILD_TOOLS "Build the lld tools. If OFF, just generate build targets." ON) if (MSVC) add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. endif() include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include ) if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h" PATTERN ".svn" EXCLUDE ) endif() add_subdirectory(Common) add_subdirectory(lib) add_subdirectory(tools/lld) if (LLVM_INCLUDE_TESTS) add_subdirectory(test) add_subdirectory(unittests) endif() add_subdirectory(docs) add_subdirectory(COFF) add_subdirectory(ELF) add_subdirectory(MinGW) add_subdirectory(wasm) Index: vendor/lld/dist-release_90/COFF/Config.h =================================================================== --- vendor/lld/dist-release_90/COFF/Config.h (revision 351717) +++ vendor/lld/dist-release_90/COFF/Config.h (revision 351718) @@ -1,231 +1,232 @@ //===- Config.h -------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_CONFIG_H #define LLD_COFF_CONFIG_H #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" #include "llvm/Support/CachePruning.h" #include #include #include #include namespace lld { namespace coff { using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::WindowsSubsystem; using llvm::StringRef; class DefinedAbsolute; class DefinedRelative; class StringChunk; class Symbol; class InputFile; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; static const auto ARM64 = llvm::COFF::IMAGE_FILE_MACHINE_ARM64; static const auto ARMNT = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; // Represents an /export option. struct Export { StringRef name; // N in /export:N or /export:E=N StringRef extName; // E in /export:E=N Symbol *sym = nullptr; uint16_t ordinal = 0; bool noname = false; bool data = false; bool isPrivate = false; bool constant = false; // If an export is a form of /export:foo=dllname.bar, that means // that foo should be exported as an alias to bar in the DLL. // forwardTo is set to "dllname.bar" part. Usually empty. StringRef forwardTo; StringChunk *forwardChunk = nullptr; // True if this /export option was in .drectves section. bool directives = false; StringRef symbolName; StringRef exportName; // Name in DLL bool operator==(const Export &e) { return (name == e.name && extName == e.extName && ordinal == e.ordinal && noname == e.noname && data == e.data && isPrivate == e.isPrivate); } }; enum class DebugType { None = 0x0, CV = 0x1, /// CodeView PData = 0x2, /// Procedure Data Fixup = 0x4, /// Relocation Table }; enum class GuardCFLevel { Off, NoLongJmp, // Emit gfids but no longjmp tables Full, // Enable all protections. }; // Global configuration. struct Configuration { enum ManifestKind { SideBySide, Embed, No }; bool is64() { return machine == AMD64 || machine == ARM64; } llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; size_t wordsize; bool verbose = false; WindowsSubsystem subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; Symbol *entry = nullptr; bool noEntry = false; std::string outputFile; std::string importName; bool demangle = true; bool doGC = true; bool doICF = true; bool tailMerge; bool relocatable = true; bool forceMultiple = false; bool forceMultipleRes = false; bool forceUnresolved = false; bool debug = false; bool debugDwarf = false; bool debugGHashes = false; bool debugSymtab = false; bool showTiming = false; bool showSummary = false; unsigned debugTypes = static_cast(DebugType::None); std::vector natvisFiles; llvm::SmallString<128> pdbAltPath; llvm::SmallString<128> pdbPath; llvm::SmallString<128> pdbSourcePath; std::vector argv; // Symbols in this set are considered as live by the garbage collector. std::vector gcroot; std::set noDefaultLibs; bool noDefaultLibAll = false; // True if we are creating a DLL. bool dll = false; StringRef implib; std::vector exports; std::set delayLoads; std::map dllOrder; Symbol *delayLoadHelper = nullptr; bool saveTemps = false; // /guard:cf GuardCFLevel guardCF = GuardCFLevel::Off; // Used for SafeSEH. bool safeSEH = false; Symbol *sehTable = nullptr; Symbol *sehCount = nullptr; // Used for /opt:lldlto=N unsigned ltoo = 2; // Used for /opt:lldltojobs=N unsigned thinLTOJobs = 0; // Used for /opt:lldltopartitions=N unsigned ltoPartitions = 1; // Used for /opt:lldltocache=path StringRef ltoCache; // Used for /opt:lldltocachepolicy=policy llvm::CachePruningPolicy ltoCachePolicy; // Used for /merge:from=to (e.g. /merge:.rdata=.text) std::map merge; // Used for /section=.name,{DEKPRSW} to set section attributes. std::map section; // Options for manifest files. ManifestKind manifest = No; int manifestID = 1; StringRef manifestDependency; bool manifestUAC = true; std::vector manifestInput; StringRef manifestLevel = "'asInvoker'"; StringRef manifestUIAccess = "'false'"; StringRef manifestFile; // Used for /aligncomm. std::map alignComm; // Used for /failifmismatch. std::map> mustMatch; // Used for /alternatename. std::map alternateNames; // Used for /order. llvm::StringMap order; // Used for /lldmap. std::string mapFile; // Used for /thinlto-index-only: llvm::StringRef thinLTOIndexOnlyArg; // Used for /thinlto-object-prefix-replace: std::pair thinLTOPrefixReplace; // Used for /thinlto-object-suffix-replace: std::pair thinLTOObjectSuffixReplace; + uint64_t align = 4096; uint64_t imageBase = -1; uint64_t fileAlign = 512; uint64_t stackReserve = 1024 * 1024; uint64_t stackCommit = 4096; uint64_t heapReserve = 1024 * 1024; uint64_t heapCommit = 4096; uint32_t majorImageVersion = 0; uint32_t minorImageVersion = 0; uint32_t majorOSVersion = 6; uint32_t minorOSVersion = 0; uint32_t timestamp = 0; uint32_t functionPadMin = 0; bool dynamicBase = true; bool allowBind = true; bool nxCompat = true; bool allowIsolation = true; bool terminalServerAware = true; bool largeAddressAware = false; bool highEntropyVA = false; bool appContainer = false; bool mingw = false; bool warnMissingOrderSymbol = true; bool warnLocallyDefinedImported = true; bool warnDebugInfoUnusable = true; bool incremental = true; bool integrityCheck = false; bool killAt = false; bool repro = false; bool swaprunCD = false; bool swaprunNet = false; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; }; extern Configuration *config; } // namespace coff } // namespace lld #endif Index: vendor/lld/dist-release_90/COFF/Driver.cpp =================================================================== --- vendor/lld/dist-release_90/COFF/Driver.cpp (revision 351717) +++ vendor/lld/dist-release_90/COFF/Driver.cpp (revision 351718) @@ -1,1893 +1,1917 @@ //===- Driver.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Driver.h" #include "Config.h" #include "DebugTypes.h" #include "ICF.h" #include "InputFiles.h" #include "MarkLive.h" #include "MinGW.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include #include #include using namespace llvm; using namespace llvm::object; using namespace llvm::COFF; using llvm::sys::Process; namespace lld { namespace coff { static Timer inputFileTimer("Input File Reading", Timer::root()); Configuration *config; LinkerDriver *driver; bool link(ArrayRef args, bool canExitEarly, raw_ostream &diag) { errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorOS = &diag; errorHandler().colorDiagnostics = diag.has_colors(); errorHandler().errorLimitExceededMsg = "too many errors emitted, stopping now" " (use /errorlimit:0 to see all errors)"; errorHandler().exitEarly = canExitEarly; config = make(); symtab = make(); driver = make(); driver->link(args); // Call exit() if we can to avoid calling destructors. if (canExitEarly) exitLld(errorCount() ? 1 : 0); freeArena(); ObjFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); return !errorCount(); } // Parse options of the form "old;new". static std::pair getOldNewOptions(opt::InputArgList &args, unsigned id) { auto *arg = args.getLastArg(id); if (!arg) return {"", ""}; StringRef s = arg->getValue(); std::pair ret = s.split(';'); if (ret.second.empty()) error(arg->getSpelling() + " expects 'old;new' format, but got " + s); return ret; } // Drop directory components and replace extension with ".exe" or ".dll". static std::string getOutputPath(StringRef path) { auto p = path.find_last_of("\\/"); StringRef s = (p == StringRef::npos) ? path : path.substr(p + 1); const char* e = config->dll ? ".dll" : ".exe"; return (s.substr(0, s.rfind('.')) + e).str(); } // Returns true if S matches /crtend.?\.o$/. static bool isCrtend(StringRef s) { if (!s.endswith(".o")) return false; s = s.drop_back(2); if (s.endswith("crtend")) return true; return !s.empty() && s.drop_back().endswith("crtend"); } // ErrorOr is not default constructible, so it cannot be used as the type // parameter of a future. // FIXME: We could open the file in createFutureForFile and avoid needing to // return an error here, but for the moment that would cost us a file descriptor // (a limited resource on Windows) for the duration that the future is pending. using MBErrPair = std::pair, std::error_code>; // Create a std::future that opens and maps a file using the best strategy for // the host platform. static std::future createFutureForFile(std::string path) { #if _WIN32 // On Windows, file I/O is relatively slow so it is best to do this // asynchronously. auto strategy = std::launch::async; #else auto strategy = std::launch::deferred; #endif return std::async(strategy, [=]() { auto mbOrErr = MemoryBuffer::getFile(path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!mbOrErr) return MBErrPair{nullptr, mbOrErr.getError()}; return MBErrPair{std::move(*mbOrErr), std::error_code()}; }); } // Symbol names are mangled by prepending "_" on x86. static StringRef mangle(StringRef sym) { assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN); if (config->machine == I386) return saver.save("_" + sym); return sym; } static bool findUnderscoreMangle(StringRef sym) { Symbol *s = symtab->findMangle(mangle(sym)); return s && !isa(s); } MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr mb) { MemoryBufferRef mbref = *mb; make>(std::move(mb)); // take ownership if (driver->tar) driver->tar->append(relativeToRoot(mbref.getBufferIdentifier()), mbref.getBuffer()); return mbref; } void LinkerDriver::addBuffer(std::unique_ptr mb, bool wholeArchive) { StringRef filename = mb->getBufferIdentifier(); MemoryBufferRef mbref = takeBuffer(std::move(mb)); filePaths.push_back(filename); // File type is detected by contents, not by file extension. switch (identify_magic(mbref.getBuffer())) { case file_magic::windows_resource: resources.push_back(mbref); break; case file_magic::archive: if (wholeArchive) { std::unique_ptr file = CHECK(Archive::create(mbref), filename + ": failed to parse archive"); Archive *archive = file.get(); make>(std::move(file)); // take ownership for (MemoryBufferRef m : getArchiveMembers(archive)) addArchiveBuffer(m, "", filename, 0); return; } symtab->addFile(make(mbref)); break; case file_magic::bitcode: symtab->addFile(make(mbref, "", 0)); break; case file_magic::coff_object: case file_magic::coff_import_library: symtab->addFile(make(mbref)); break; case file_magic::pdb: loadTypeServerSource(mbref); break; case file_magic::coff_cl_gl_object: error(filename + ": is not a native COFF file. Recompile without /GL"); break; case file_magic::pecoff_executable: if (filename.endswith_lower(".dll")) { error(filename + ": bad file type. Did you specify a DLL instead of an " "import library?"); break; } LLVM_FALLTHROUGH; default: error(mbref.getBufferIdentifier() + ": unknown file type"); break; } } void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive) { auto future = std::make_shared>(createFutureForFile(path)); std::string pathStr = path; enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) { std::string msg = "could not open '" + pathStr + "': " + mbOrErr.second.message(); // Check if the filename is a typo for an option flag. OptTable thinks // that all args that are not known options and that start with / are // filenames, but e.g. `/nodefaultlibs` is more likely a typo for // the option `/nodefaultlib` than a reference to a file in the root // directory. std::string nearest; if (COFFOptTable().findNearest(pathStr, nearest) > 1) error(msg); else error(msg + "; did you mean '" + nearest + "'"); } else driver->addBuffer(std::move(mbOrErr.first), wholeArchive); }); } void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName, StringRef parentName, uint64_t offsetInArchive) { file_magic magic = identify_magic(mb.getBuffer()); if (magic == file_magic::coff_import_library) { InputFile *imp = make(mb); imp->parentName = parentName; symtab->addFile(imp); return; } InputFile *obj; if (magic == file_magic::coff_object) { obj = make(mb); } else if (magic == file_magic::bitcode) { obj = make(mb, parentName, offsetInArchive); } else { error("unknown file type: " + mb.getBufferIdentifier()); return; } obj->parentName = parentName; symtab->addFile(obj); log("Loaded " + toString(obj) + " for " + symName); } void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, - StringRef symName, + const Archive::Symbol &sym, StringRef parentName) { - auto reportBufferError = [=](Error &&e, - StringRef childName) { + auto reportBufferError = [=](Error &&e, StringRef childName) { fatal("could not get the buffer for the member defining symbol " + - symName + ": " + parentName + "(" + childName + "): " + + toCOFFString(sym) + ": " + parentName + "(" + childName + "): " + toString(std::move(e))); }; if (!c.getParent()->isThin()) { uint64_t offsetInArchive = c.getChildOffset(); Expected mbOrErr = c.getMemoryBufferRef(); if (!mbOrErr) reportBufferError(mbOrErr.takeError(), check(c.getFullName())); MemoryBufferRef mb = mbOrErr.get(); enqueueTask([=]() { - driver->addArchiveBuffer(mb, symName, parentName, offsetInArchive); + driver->addArchiveBuffer(mb, toCOFFString(sym), parentName, + offsetInArchive); }); return; } std::string childName = CHECK( c.getFullName(), "could not get the filename for the member defining symbol " + - symName); + toCOFFString(sym)); auto future = std::make_shared>( createFutureForFile(childName)); enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) reportBufferError(errorCodeToError(mbOrErr.second), childName); - driver->addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)), symName, - parentName, /* OffsetInArchive */ 0); + driver->addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)), + toCOFFString(sym), parentName, + /*OffsetInArchive=*/0); }); } static bool isDecorated(StringRef sym) { return sym.startswith("@") || sym.contains("@@") || sym.startswith("?") || (!config->mingw && sym.contains('@')); } // Parses .drectve section contents and returns a list of files // specified by /defaultlib. void LinkerDriver::parseDirectives(InputFile *file) { StringRef s = file->getDirectives(); if (s.empty()) return; log("Directives: " + toString(file) + ": " + s); ArgParser parser; // .drectve is always tokenized using Windows shell rules. // /EXPORT: option can appear too many times, processing in fastpath. opt::InputArgList args; std::vector exports; std::tie(args, exports) = parser.parseDirectives(s); for (StringRef e : exports) { // If a common header file contains dllexported function // declarations, many object files may end up with having the // same /EXPORT options. In order to save cost of parsing them, // we dedup them first. if (!directivesExports.insert(e).second) continue; Export exp = parseExport(e); if (config->machine == I386 && config->mingw) { if (!isDecorated(exp.name)) exp.name = saver.save("_" + exp.name); if (!exp.extName.empty() && !isDecorated(exp.extName)) exp.extName = saver.save("_" + exp.extName); } exp.directives = true; config->exports.push_back(exp); } for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_aligncomm: parseAligncomm(arg->getValue()); break; case OPT_alternatename: parseAlternateName(arg->getValue()); break; case OPT_defaultlib: if (Optional path = findLib(arg->getValue())) enqueuePath(*path, false); break; case OPT_entry: config->entry = addUndefined(mangle(arg->getValue())); break; case OPT_failifmismatch: checkFailIfMismatch(arg->getValue(), file); break; case OPT_incl: addUndefined(arg->getValue()); break; case OPT_merge: parseMerge(arg->getValue()); break; case OPT_nodefaultlib: config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); break; case OPT_section: parseSection(arg->getValue()); break; case OPT_subsystem: parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); break; // Only add flags here that link.exe accepts in // `#pragma comment(linker, "/flag")`-generated sections. case OPT_editandcontinue: case OPT_guardsym: case OPT_throwingnew: break; default: error(arg->getSpelling() + " is not allowed in .drectve"); } } } // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef filename) { bool hasPathSep = (filename.find_first_of("/\\") != StringRef::npos); if (hasPathSep) return filename; bool hasExt = filename.contains('.'); for (StringRef dir : searchPaths) { SmallString<128> path = dir; sys::path::append(path, filename); if (sys::fs::exists(path.str())) return saver.save(path.str()); if (!hasExt) { path.append(".obj"); if (sys::fs::exists(path.str())) return saver.save(path.str()); } } return filename; } static Optional getUniqueID(StringRef path) { sys::fs::UniqueID ret; if (sys::fs::getUniqueID(path, ret)) return None; return ret; } // Resolves a file path. This never returns the same path // (in that case, it returns None). Optional LinkerDriver::findFile(StringRef filename) { StringRef path = doFindFile(filename); if (Optional id = getUniqueID(path)) { bool seen = !visitedFiles.insert(*id).second; if (seen) return None; } if (path.endswith_lower(".lib")) visitedLibs.insert(sys::path::filename(path)); return path; } // MinGW specific. If an embedded directive specified to link to // foo.lib, but it isn't found, try libfoo.a instead. StringRef LinkerDriver::doFindLibMinGW(StringRef filename) { if (filename.contains('/') || filename.contains('\\')) return filename; SmallString<128> s = filename; sys::path::replace_extension(s, ".a"); StringRef libName = saver.save("lib" + s.str()); return doFindFile(libName); } // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef filename) { // Add ".lib" to Filename if that has no file extension. bool hasExt = filename.contains('.'); if (!hasExt) filename = saver.save(filename + ".lib"); StringRef ret = doFindFile(filename); // For MinGW, if the find above didn't turn up anything, try // looking for a MinGW formatted library name. if (config->mingw && ret == filename) return doFindLibMinGW(filename); return ret; } // Resolves a library path. /nodefaultlib options are taken into // consideration. This never returns the same path (in that case, // it returns None). Optional LinkerDriver::findLib(StringRef filename) { if (config->noDefaultLibAll) return None; if (!visitedLibs.insert(filename.lower()).second) return None; StringRef path = doFindLib(filename); if (config->noDefaultLibs.count(path.lower())) return None; if (Optional id = getUniqueID(path)) if (!visitedFiles.insert(*id).second) return None; return path; } // Parses LIB environment which contains a list of search paths. void LinkerDriver::addLibSearchPaths() { Optional envOpt = Process::GetEnv("LIB"); if (!envOpt.hasValue()) return; StringRef env = saver.save(*envOpt); while (!env.empty()) { StringRef path; std::tie(path, env) = env.split(';'); searchPaths.push_back(path); } } Symbol *LinkerDriver::addUndefined(StringRef name) { Symbol *b = symtab->addUndefined(name); if (!b->isGCRoot) { b->isGCRoot = true; config->gcroot.push_back(b); } return b; } StringRef LinkerDriver::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); if (!unmangled) return ""; // Otherwise, see if a similar, mangled symbol exists in the symbol table. Symbol *mangled = symtab->findMangle(unmangled->getName()); if (!mangled) return ""; // If we find a similar mangled symbol, make this an alias to it and return // its name. log(unmangled->getName() + " aliased to " + mangled->getName()); unmangled->weakAlias = symtab->addUndefined(mangled->getName()); return mangled->getName(); } // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, // each of which corresponds to a user-defined "main" function. This function // infers an entry point from a user-defined "main" function. StringRef LinkerDriver::findDefaultEntry() { assert(config->subsystem != IMAGE_SUBSYSTEM_UNKNOWN && "must handle /subsystem before calling this"); if (config->mingw) return mangle(config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI ? "WinMainCRTStartup" : "mainCRTStartup"); if (config->subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) { if (findUnderscoreMangle("wWinMain")) { if (!findUnderscoreMangle("WinMain")) return mangle("wWinMainCRTStartup"); warn("found both wWinMain and WinMain; using latter"); } return mangle("WinMainCRTStartup"); } if (findUnderscoreMangle("wmain")) { if (!findUnderscoreMangle("main")) return mangle("wmainCRTStartup"); warn("found both wmain and main; using latter"); } return mangle("mainCRTStartup"); } WindowsSubsystem LinkerDriver::inferSubsystem() { if (config->dll) return IMAGE_SUBSYSTEM_WINDOWS_GUI; if (config->mingw) return IMAGE_SUBSYSTEM_WINDOWS_CUI; // Note that link.exe infers the subsystem from the presence of these // functions even if /entry: or /nodefaultlib are passed which causes them // to not be called. bool haveMain = findUnderscoreMangle("main"); bool haveWMain = findUnderscoreMangle("wmain"); bool haveWinMain = findUnderscoreMangle("WinMain"); bool haveWWinMain = findUnderscoreMangle("wWinMain"); if (haveMain || haveWMain) { if (haveWinMain || haveWWinMain) { warn(std::string("found ") + (haveMain ? "main" : "wmain") + " and " + (haveWinMain ? "WinMain" : "wWinMain") + "; defaulting to /subsystem:console"); } return IMAGE_SUBSYSTEM_WINDOWS_CUI; } if (haveWinMain || haveWWinMain) return IMAGE_SUBSYSTEM_WINDOWS_GUI; return IMAGE_SUBSYSTEM_UNKNOWN; } static uint64_t getDefaultImageBase() { if (config->is64()) return config->dll ? 0x180000000 : 0x140000000; return config->dll ? 0x10000000 : 0x400000; } static std::string createResponseFile(const opt::InputArgList &args, ArrayRef filePaths, ArrayRef searchPaths) { SmallString<0> data; raw_svector_ostream os(data); for (auto *arg : args) { switch (arg->getOption().getID()) { case OPT_linkrepro: case OPT_INPUT: case OPT_defaultlib: case OPT_libpath: case OPT_manifest: case OPT_manifest_colon: case OPT_manifestdependency: case OPT_manifestfile: case OPT_manifestinput: case OPT_manifestuac: break; case OPT_implib: case OPT_pdb: case OPT_out: os << arg->getSpelling() << sys::path::filename(arg->getValue()) << "\n"; break; default: os << toString(*arg) << "\n"; } } for (StringRef path : searchPaths) { std::string relPath = relativeToRoot(path); os << "/libpath:" << quote(relPath) << "\n"; } for (StringRef path : filePaths) os << quote(relativeToRoot(path)) << "\n"; return data.str(); } enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab }; static DebugKind parseDebugKind(const opt::InputArgList &args) { auto *a = args.getLastArg(OPT_debug, OPT_debug_opt); if (!a) return DebugKind::None; if (a->getNumValues() == 0) return DebugKind::Full; DebugKind debug = StringSwitch(a->getValue()) .CaseLower("none", DebugKind::None) .CaseLower("full", DebugKind::Full) .CaseLower("fastlink", DebugKind::FastLink) // LLD extensions .CaseLower("ghash", DebugKind::GHash) .CaseLower("dwarf", DebugKind::Dwarf) .CaseLower("symtab", DebugKind::Symtab) .Default(DebugKind::Unknown); if (debug == DebugKind::FastLink) { warn("/debug:fastlink unsupported; using /debug:full"); return DebugKind::Full; } if (debug == DebugKind::Unknown) { error("/debug: unknown option: " + Twine(a->getValue())); return DebugKind::None; } return debug; } static unsigned parseDebugTypes(const opt::InputArgList &args) { unsigned debugTypes = static_cast(DebugType::None); if (auto *a = args.getLastArg(OPT_debugtype)) { SmallVector types; StringRef(a->getValue()) .split(types, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); for (StringRef type : types) { unsigned v = StringSwitch(type.lower()) .Case("cv", static_cast(DebugType::CV)) .Case("pdata", static_cast(DebugType::PData)) .Case("fixup", static_cast(DebugType::Fixup)) .Default(0); if (v == 0) { warn("/debugtype: unknown option '" + type + "'"); continue; } debugTypes |= v; } return debugTypes; } // Default debug types debugTypes = static_cast(DebugType::CV); if (args.hasArg(OPT_driver)) debugTypes |= static_cast(DebugType::PData); if (args.hasArg(OPT_profile)) debugTypes |= static_cast(DebugType::Fixup); return debugTypes; } static std::string getMapFile(const opt::InputArgList &args) { auto *arg = args.getLastArg(OPT_lldmap, OPT_lldmap_file); if (!arg) return ""; if (arg->getOption().getID() == OPT_lldmap_file) return arg->getValue(); assert(arg->getOption().getID() == OPT_lldmap); StringRef outFile = config->outputFile; return (outFile.substr(0, outFile.rfind('.')) + ".map").str(); } static std::string getImplibPath() { if (!config->implib.empty()) return config->implib; SmallString<128> out = StringRef(config->outputFile); sys::path::replace_extension(out, ".lib"); return out.str(); } // // The import name is caculated as the following: // // | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY // -----+----------------+---------------------+------------------ // LINK | {value} | {value}.{.dll/.exe} | {output name} // LIB | {value} | {value}.dll | {output name}.dll // static std::string getImportName(bool asLib) { SmallString<128> out; if (config->importName.empty()) { out.assign(sys::path::filename(config->outputFile)); if (asLib) sys::path::replace_extension(out, ".dll"); } else { out.assign(config->importName); if (!sys::path::has_extension(out)) sys::path::replace_extension(out, (config->dll || asLib) ? ".dll" : ".exe"); } return out.str(); } static void createImportLibrary(bool asLib) { std::vector exports; for (Export &e1 : config->exports) { COFFShortExport e2; e2.Name = e1.name; e2.SymbolName = e1.symbolName; e2.ExtName = e1.extName; e2.Ordinal = e1.ordinal; e2.Noname = e1.noname; e2.Data = e1.data; e2.Private = e1.isPrivate; e2.Constant = e1.constant; exports.push_back(e2); } auto handleError = [](Error &&e) { handleAllErrors(std::move(e), [](ErrorInfoBase &eib) { error(eib.message()); }); }; std::string libName = getImportName(asLib); std::string path = getImplibPath(); if (!config->incremental) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } // If the import library already exists, replace it only if the contents // have changed. ErrorOr> oldBuf = MemoryBuffer::getFile( path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); if (!oldBuf) { handleError(writeImportLibrary(libName, path, exports, config->machine, config->mingw)); return; } SmallString<128> tmpName; if (std::error_code ec = sys::fs::createUniqueFile(path + ".tmp-%%%%%%%%.lib", tmpName)) fatal("cannot create temporary file for import library " + path + ": " + ec.message()); if (Error e = writeImportLibrary(libName, tmpName, exports, config->machine, config->mingw)) { handleError(std::move(e)); return; } std::unique_ptr newBuf = check(MemoryBuffer::getFile( tmpName, /*FileSize*/ -1, /*RequiresNullTerminator*/ false)); if ((*oldBuf)->getBuffer() != newBuf->getBuffer()) { oldBuf->reset(); handleError(errorCodeToError(sys::fs::rename(tmpName, path))); } else { sys::fs::remove(tmpName); } } static void parseModuleDefs(StringRef path) { std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); COFFModuleDefinition m = check(parseCOFFModuleDefinition( mb->getMemBufferRef(), config->machine, config->mingw)); if (config->outputFile.empty()) config->outputFile = saver.save(m.OutputFile); config->importName = saver.save(m.ImportName); if (m.ImageBase) config->imageBase = m.ImageBase; if (m.StackReserve) config->stackReserve = m.StackReserve; if (m.StackCommit) config->stackCommit = m.StackCommit; if (m.HeapReserve) config->heapReserve = m.HeapReserve; if (m.HeapCommit) config->heapCommit = m.HeapCommit; if (m.MajorImageVersion) config->majorImageVersion = m.MajorImageVersion; if (m.MinorImageVersion) config->minorImageVersion = m.MinorImageVersion; if (m.MajorOSVersion) config->majorOSVersion = m.MajorOSVersion; if (m.MinorOSVersion) config->minorOSVersion = m.MinorOSVersion; for (COFFShortExport e1 : m.Exports) { Export e2; // In simple cases, only Name is set. Renamed exports are parsed // and set as "ExtName = Name". If Name has the form "OtherDll.Func", // it shouldn't be a normal exported function but a forward to another // DLL instead. This is supported by both MS and GNU linkers. if (e1.ExtName != e1.Name && StringRef(e1.Name).contains('.')) { e2.name = saver.save(e1.ExtName); e2.forwardTo = saver.save(e1.Name); config->exports.push_back(e2); continue; } e2.name = saver.save(e1.Name); e2.extName = saver.save(e1.ExtName); e2.ordinal = e1.Ordinal; e2.noname = e1.Noname; e2.data = e1.Data; e2.isPrivate = e1.Private; e2.constant = e1.Constant; config->exports.push_back(e2); } } void LinkerDriver::enqueueTask(std::function task) { taskQueue.push_back(std::move(task)); } bool LinkerDriver::run() { ScopedTimer t(inputFileTimer); bool didWork = !taskQueue.empty(); while (!taskQueue.empty()) { taskQueue.front()(); taskQueue.pop_front(); } return didWork; } // Parse an /order file. If an option is given, the linker places // COMDAT sections in the same order as their names appear in the // given file. static void parseOrderFile(StringRef arg) { // For some reason, the MSVC linker requires a filename to be // preceded by "@". if (!arg.startswith("@")) { error("malformed /order option: '@' missing"); return; } // Get a list of all comdat sections for error checking. DenseSet set; for (Chunk *c : symtab->getChunks()) if (auto *sec = dyn_cast(c)) if (sec->sym) set.insert(sec->sym->getName()); // Open a file. StringRef path = arg.substr(1); std::unique_ptr mb = CHECK( MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); // Parse a file. An order file contains one symbol per line. // All symbols that were not present in a given order file are // considered to have the lowest priority 0 and are placed at // end of an output section. for (std::string s : args::getLines(mb->getMemBufferRef())) { if (config->machine == I386 && !isDecorated(s)) s = "_" + s; if (set.count(s) == 0) { if (config->warnMissingOrderSymbol) warn("/order:" + arg + ": missing symbol: " + s + " [LNK4037]"); } else config->order[s] = INT_MIN + config->order.size(); } } static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (SectionChunk *c = dyn_cast_or_null(d->getChunk())) c->keepUnique = true; } static void findKeepUniqueSections() { // Exported symbols could be address-significant in other executables or DSOs, // so we conservatively mark them as address-significant. for (Export &r : config->exports) markAddrsig(r.sym); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. for (ObjFile *obj : ObjFile::instances) { ArrayRef syms = obj->getSymbols(); if (obj->addrsigSec) { ArrayRef contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->addrsigSec, contents)); const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; const char *err; uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(obj) + ": could not decode addrsig section: " + err); if (symIndex >= syms.size()) fatal(toString(obj) + ": invalid symbol index in addrsig section"); markAddrsig(syms[symIndex]); cur += size; } } else { // If an object file does not have an address-significance table, // conservatively mark all of its symbols as address-significant. for (Symbol *s : syms) markAddrsig(s); } } } // link.exe replaces each %foo% in altPath with the contents of environment // variable foo, and adds the two magic env vars _PDB (expands to the basename // of pdb's output path) and _EXT (expands to the extension of the output // binary). // lld only supports %_PDB% and %_EXT% and warns on references to all other env // vars. static void parsePDBAltPath(StringRef altPath) { SmallString<128> buf; StringRef pdbBasename = sys::path::filename(config->pdbPath, sys::path::Style::windows); StringRef binaryExtension = sys::path::extension(config->outputFile, sys::path::Style::windows); if (!binaryExtension.empty()) binaryExtension = binaryExtension.substr(1); // %_EXT% does not include '.'. // Invariant: // +--------- cursor ('a...' might be the empty string). // | +----- firstMark // | | +- secondMark // v v v // a...%...%... size_t cursor = 0; while (cursor < altPath.size()) { size_t firstMark, secondMark; if ((firstMark = altPath.find('%', cursor)) == StringRef::npos || (secondMark = altPath.find('%', firstMark + 1)) == StringRef::npos) { // Didn't find another full fragment, treat rest of string as literal. buf.append(altPath.substr(cursor)); break; } // Found a full fragment. Append text in front of first %, and interpret // text between first and second % as variable name. buf.append(altPath.substr(cursor, firstMark - cursor)); StringRef var = altPath.substr(firstMark, secondMark - firstMark + 1); if (var.equals_lower("%_pdb%")) buf.append(pdbBasename); else if (var.equals_lower("%_ext%")) buf.append(binaryExtension); else { warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + var + " as literal"); buf.append(var); } cursor = secondMark + 1; } config->pdbAltPath = buf; } /// Check that at most one resource obj file was used. /// Call after ObjFile::Instances is complete. static void diagnoseMultipleResourceObjFiles() { // The .rsrc$01 section in a resource obj file contains a tree description // of resources. Merging multiple resource obj files would require merging // the trees instead of using usual linker section merging semantics. // Since link.exe disallows linking more than one resource obj file with // LNK4078, mirror that. The normal use of resource files is to give the // linker many .res files, which are then converted to a single resource obj // file internally, so this is not a big restriction in practice. ObjFile *resourceObjFile = nullptr; for (ObjFile *f : ObjFile::instances) { if (!f->isResourceObjFile) continue; if (!resourceObjFile) { resourceObjFile = f; continue; } error(toString(f) + ": more than one resource obj file not allowed, already got " + toString(resourceObjFile)); } } // In MinGW, if no symbols are chosen to be exported, then all symbols are // automatically exported by default. This behavior can be forced by the // -export-all-symbols option, so that it happens even when exports are // explicitly specified. The automatic behavior can be disabled using the // -exclude-all-symbols option, so that lld-link behaves like link.exe rather // than MinGW in the case that nothing is explicitly exported. void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) { if (!config->dll) return; if (!args.hasArg(OPT_export_all_symbols)) { if (!config->exports.empty()) return; if (args.hasArg(OPT_exclude_all_symbols)) return; } AutoExporter exporter; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) exporter.addWholeArchive(*path); symtab->forEachSymbol([&](Symbol *s) { auto *def = dyn_cast(s); if (!exporter.shouldExport(def)) return; Export e; e.name = def->getName(); e.sym = def; if (Chunk *c = def->getChunk()) if (!(c->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)) e.data = true; config->exports.push_back(e); }); } +static const char *libcallRoutineNames[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL +}; + void LinkerDriver::link(ArrayRef argsArr) { // Needed for LTO. InitializeAllTargetInfos(); InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllAsmPrinters(); // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. if (argsArr.size() > 1 && StringRef(argsArr[1]).equals_lower("/lib")) { if (llvm::libDriverMain(argsArr.slice(1)) != 0) fatal("lib failed"); return; } // Parse command line options. ArgParser parser; opt::InputArgList args = parser.parseLINK(argsArr); // Parse and evaluate -mllvm options. std::vector v; v.push_back("lld-link (LLVM option parsing)"); for (auto *arg : args.filtered(OPT_mllvm)) v.push_back(arg->getValue()); cl::ParseCommandLineOptions(v.size(), v.data()); // Handle /errorlimit early, because error() depends on it. if (auto *arg = args.getLastArg(OPT_errorlimit)) { int n = 20; StringRef s = arg->getValue(); if (s.getAsInteger(10, n)) error(arg->getSpelling() + " number expected, but got " + s); errorHandler().errorLimit = n; } // Handle /help if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); return; } lld::threadsEnabled = args.hasFlag(OPT_threads, OPT_threads_no, true); if (args.hasArg(OPT_show_timing)) config->showTiming = true; config->showSummary = args.hasArg(OPT_summary); ScopedTimer t(Timer::root()); // Handle --version, which is an lld extension. This option is a bit odd // because it doesn't start with "/", but we deliberately chose "--" to // avoid conflict with /version and for compatibility with clang-cl. if (args.hasArg(OPT_dash_dash_version)) { outs() << getLLDVersion() << "\n"; return; } // Handle /lldmingw early, since it can potentially affect how other // options are handled. config->mingw = args.hasArg(OPT_lldmingw); if (auto *arg = args.getLastArg(OPT_linkrepro)) { SmallString<64> path = StringRef(arg->getValue()); sys::path::append(path, "repro.tar"); Expected> errOrWriter = TarWriter::create(path, "repro"); if (errOrWriter) { tar = std::move(*errOrWriter); } else { error("/linkrepro: failed to open " + path + ": " + toString(errOrWriter.takeError())); } } if (!args.hasArg(OPT_INPUT)) { if (args.hasArg(OPT_deffile)) config->noEntry = true; else fatal("no input files"); } // Construct search path list. searchPaths.push_back(""); for (auto *arg : args.filtered(OPT_libpath)) searchPaths.push_back(arg->getValue()); addLibSearchPaths(); // Handle /ignore for (auto *arg : args.filtered(OPT_ignore)) { SmallVector vec; StringRef(arg->getValue()).split(vec, ','); for (StringRef s : vec) { if (s == "4037") config->warnMissingOrderSymbol = false; else if (s == "4099") config->warnDebugInfoUnusable = false; else if (s == "4217") config->warnLocallyDefinedImported = false; // Other warning numbers are ignored. } } // Handle /out if (auto *arg = args.getLastArg(OPT_out)) config->outputFile = arg->getValue(); // Handle /verbose if (args.hasArg(OPT_verbose)) config->verbose = true; errorHandler().verbose = config->verbose; // Handle /force or /force:unresolved if (args.hasArg(OPT_force, OPT_force_unresolved)) config->forceUnresolved = true; // Handle /force or /force:multiple if (args.hasArg(OPT_force, OPT_force_multiple)) config->forceMultiple = true; // Handle /force or /force:multipleres if (args.hasArg(OPT_force, OPT_force_multipleres)) config->forceMultipleRes = true; // Handle /debug DebugKind debug = parseDebugKind(args); if (debug == DebugKind::Full || debug == DebugKind::Dwarf || debug == DebugKind::GHash) { config->debug = true; config->incremental = true; } // Handle /demangle config->demangle = args.hasFlag(OPT_demangle, OPT_demangle_no); // Handle /debugtype config->debugTypes = parseDebugTypes(args); // Handle /pdb bool shouldCreatePDB = (debug == DebugKind::Full || debug == DebugKind::GHash); if (shouldCreatePDB) { if (auto *arg = args.getLastArg(OPT_pdb)) config->pdbPath = arg->getValue(); if (auto *arg = args.getLastArg(OPT_pdbaltpath)) config->pdbAltPath = arg->getValue(); if (args.hasArg(OPT_natvis)) config->natvisFiles = args.getAllArgValues(OPT_natvis); if (auto *arg = args.getLastArg(OPT_pdb_source_path)) config->pdbSourcePath = arg->getValue(); } // Handle /noentry if (args.hasArg(OPT_noentry)) { if (args.hasArg(OPT_dll)) config->noEntry = true; else error("/noentry must be specified with /dll"); } // Handle /dll if (args.hasArg(OPT_dll)) { config->dll = true; config->manifestID = 2; } // Handle /dynamicbase and /fixed. We can't use hasFlag for /dynamicbase // because we need to explicitly check whether that option or its inverse was // present in the argument list in order to handle /fixed. auto *dynamicBaseArg = args.getLastArg(OPT_dynamicbase, OPT_dynamicbase_no); if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase_no) config->dynamicBase = false; // MSDN claims "/FIXED:NO is the default setting for a DLL, and /FIXED is the // default setting for any other project type.", but link.exe defaults to // /FIXED:NO for exe outputs as well. Match behavior, not docs. bool fixed = args.hasFlag(OPT_fixed, OPT_fixed_no, false); if (fixed) { if (dynamicBaseArg && dynamicBaseArg->getOption().getID() == OPT_dynamicbase) { error("/fixed must not be specified with /dynamicbase"); } else { config->relocatable = false; config->dynamicBase = false; } } // Handle /appcontainer config->appContainer = args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false); // Handle /machine if (auto *arg = args.getLastArg(OPT_machine)) { config->machine = getMachineType(arg->getValue()); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) fatal(Twine("unknown /machine argument: ") + arg->getValue()); } // Handle /nodefaultlib: for (auto *arg : args.filtered(OPT_nodefaultlib)) config->noDefaultLibs.insert(doFindLib(arg->getValue()).lower()); // Handle /nodefaultlib if (args.hasArg(OPT_nodefaultlib_all)) config->noDefaultLibAll = true; // Handle /base if (auto *arg = args.getLastArg(OPT_base)) parseNumbers(arg->getValue(), &config->imageBase); // Handle /filealign if (auto *arg = args.getLastArg(OPT_filealign)) { parseNumbers(arg->getValue(), &config->fileAlign); if (!isPowerOf2_64(config->fileAlign)) error("/filealign: not a power of two: " + Twine(config->fileAlign)); } // Handle /stack if (auto *arg = args.getLastArg(OPT_stack)) parseNumbers(arg->getValue(), &config->stackReserve, &config->stackCommit); // Handle /guard:cf if (auto *arg = args.getLastArg(OPT_guard)) parseGuard(arg->getValue()); // Handle /heap if (auto *arg = args.getLastArg(OPT_heap)) parseNumbers(arg->getValue(), &config->heapReserve, &config->heapCommit); // Handle /version if (auto *arg = args.getLastArg(OPT_version)) parseVersion(arg->getValue(), &config->majorImageVersion, &config->minorImageVersion); // Handle /subsystem if (auto *arg = args.getLastArg(OPT_subsystem)) parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion, &config->minorOSVersion); // Handle /timestamp if (llvm::opt::Arg *arg = args.getLastArg(OPT_timestamp, OPT_repro)) { if (arg->getOption().getID() == OPT_repro) { config->timestamp = 0; config->repro = true; } else { config->repro = false; StringRef value(arg->getValue()); if (value.getAsInteger(0, config->timestamp)) fatal(Twine("invalid timestamp: ") + value + ". Expected 32-bit integer"); } } else { config->repro = false; config->timestamp = time(nullptr); } // Handle /alternatename for (auto *arg : args.filtered(OPT_alternatename)) parseAlternateName(arg->getValue()); // Handle /include for (auto *arg : args.filtered(OPT_incl)) addUndefined(arg->getValue()); // Handle /implib if (auto *arg = args.getLastArg(OPT_implib)) config->implib = arg->getValue(); // Handle /opt. bool doGC = debug == DebugKind::None || args.hasArg(OPT_profile); unsigned icfLevel = args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on unsigned tailMerge = 1; for (auto *arg : args.filtered(OPT_opt)) { std::string str = StringRef(arg->getValue()).lower(); SmallVector vec; StringRef(str).split(vec, ','); for (StringRef s : vec) { if (s == "ref") { doGC = true; } else if (s == "noref") { doGC = false; } else if (s == "icf" || s.startswith("icf=")) { icfLevel = 2; } else if (s == "noicf") { icfLevel = 0; } else if (s == "lldtailmerge") { tailMerge = 2; } else if (s == "nolldtailmerge") { tailMerge = 0; } else if (s.startswith("lldlto=")) { StringRef optLevel = s.substr(7); if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) error("/opt:lldlto: invalid optimization level: " + optLevel); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (jobs.getAsInteger(10, config->thinLTOJobs) || config->thinLTOJobs == 0) error("/opt:lldltojobs: invalid job count: " + jobs); } else if (s.startswith("lldltopartitions=")) { StringRef n = s.substr(17); if (n.getAsInteger(10, config->ltoPartitions) || config->ltoPartitions == 0) error("/opt:lldltopartitions: invalid partition count: " + n); } else if (s != "lbr" && s != "nolbr") error("/opt: unknown option: " + s); } } // Limited ICF is enabled if GC is enabled and ICF was never mentioned // explicitly. // FIXME: LLD only implements "limited" ICF, i.e. it only merges identical // code. If the user passes /OPT:ICF explicitly, LLD should merge identical // comdat readonly data. if (icfLevel == 1 && !doGC) icfLevel = 0; config->doGC = doGC; config->doICF = icfLevel > 0; config->tailMerge = (tailMerge == 1 && config->doICF) || tailMerge == 2; // Handle /lldsavetemps if (args.hasArg(OPT_lldsavetemps)) config->saveTemps = true; // Handle /kill-at if (args.hasArg(OPT_kill_at)) config->killAt = true; // Handle /lldltocache if (auto *arg = args.getLastArg(OPT_lldltocache)) config->ltoCache = arg->getValue(); // Handle /lldsavecachepolicy if (auto *arg = args.getLastArg(OPT_lldltocachepolicy)) config->ltoCachePolicy = CHECK( parseCachePruningPolicy(arg->getValue()), Twine("/lldltocachepolicy: invalid cache policy: ") + arg->getValue()); // Handle /failifmismatch for (auto *arg : args.filtered(OPT_failifmismatch)) checkFailIfMismatch(arg->getValue(), nullptr); // Handle /merge for (auto *arg : args.filtered(OPT_merge)) parseMerge(arg->getValue()); // Add default section merging rules after user rules. User rules take // precedence, but we will emit a warning if there is a conflict. parseMerge(".idata=.rdata"); parseMerge(".didat=.rdata"); parseMerge(".edata=.rdata"); parseMerge(".xdata=.rdata"); parseMerge(".bss=.data"); if (config->mingw) { parseMerge(".ctors=.rdata"); parseMerge(".dtors=.rdata"); parseMerge(".CRT=.rdata"); } // Handle /section for (auto *arg : args.filtered(OPT_section)) parseSection(arg->getValue()); + // Handle /align + if (auto *arg = args.getLastArg(OPT_align)) { + parseNumbers(arg->getValue(), &config->align); + if (!isPowerOf2_64(config->align)) + error("/align: not a power of two: " + StringRef(arg->getValue())); + } + // Handle /aligncomm for (auto *arg : args.filtered(OPT_aligncomm)) parseAligncomm(arg->getValue()); // Handle /manifestdependency. This enables /manifest unless /manifest:no is // also passed. if (auto *arg = args.getLastArg(OPT_manifestdependency)) { config->manifestDependency = arg->getValue(); config->manifest = Configuration::SideBySide; } // Handle /manifest and /manifest: if (auto *arg = args.getLastArg(OPT_manifest, OPT_manifest_colon)) { if (arg->getOption().getID() == OPT_manifest) config->manifest = Configuration::SideBySide; else parseManifest(arg->getValue()); } // Handle /manifestuac if (auto *arg = args.getLastArg(OPT_manifestuac)) parseManifestUAC(arg->getValue()); // Handle /manifestfile if (auto *arg = args.getLastArg(OPT_manifestfile)) config->manifestFile = arg->getValue(); // Handle /manifestinput for (auto *arg : args.filtered(OPT_manifestinput)) config->manifestInput.push_back(arg->getValue()); if (!config->manifestInput.empty() && config->manifest != Configuration::Embed) { fatal("/manifestinput: requires /manifest:embed"); } config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_arg); config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_arg); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace); config->thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace); // Handle miscellaneous boolean flags. config->allowBind = args.hasFlag(OPT_allowbind, OPT_allowbind_no, true); config->allowIsolation = args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true); config->incremental = args.hasFlag(OPT_incremental, OPT_incremental_no, !config->doGC && !config->doICF && !args.hasArg(OPT_order) && !args.hasArg(OPT_profile)); config->integrityCheck = args.hasFlag(OPT_integritycheck, OPT_integritycheck_no, false); config->nxCompat = args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true); for (auto *arg : args.filtered(OPT_swaprun)) parseSwaprun(arg->getValue()); config->terminalServerAware = !config->dll && args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); config->debugDwarf = debug == DebugKind::Dwarf; config->debugGHashes = debug == DebugKind::GHash; config->debugSymtab = debug == DebugKind::Symtab; config->mapFile = getMapFile(args); if (config->incremental && args.hasArg(OPT_profile)) { warn("ignoring '/incremental' due to '/profile' specification"); config->incremental = false; } if (config->incremental && args.hasArg(OPT_order)) { warn("ignoring '/incremental' due to '/order' specification"); config->incremental = false; } if (config->incremental && config->doGC) { warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to " "disable"); config->incremental = false; } if (config->incremental && config->doICF) { warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to " "disable"); config->incremental = false; } if (errorCount()) return; std::set wholeArchives; for (auto *arg : args.filtered(OPT_wholearchive_file)) if (Optional path = doFindFile(arg->getValue())) if (Optional id = getUniqueID(*path)) wholeArchives.insert(*id); // A predicate returning true if a given path is an argument for // /wholearchive:, or /wholearchive is enabled globally. // This function is a bit tricky because "foo.obj /wholearchive:././foo.obj" // needs to be handled as "/wholearchive:foo.obj foo.obj". auto isWholeArchive = [&](StringRef path) -> bool { if (args.hasArg(OPT_wholearchive_flag)) return true; if (Optional id = getUniqueID(path)) return wholeArchives.count(*id); return false; }; // Create a list of input files. Files can be given as arguments // for /defaultlib option. for (auto *arg : args.filtered(OPT_INPUT, OPT_wholearchive_file)) if (Optional path = findFile(arg->getValue())) enqueuePath(*path, isWholeArchive(*path)); for (auto *arg : args.filtered(OPT_defaultlib)) if (Optional path = findLib(arg->getValue())) enqueuePath(*path, false); // Windows specific -- Create a resource file containing a manifest file. if (config->manifest == Configuration::Embed) addBuffer(createManifestRes(), false); // Read all input files given via the command line. run(); if (errorCount()) return; // We should have inferred a machine type by now from the input files, but if // not we assume x64. if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { warn("/machine is not specified. x64 is assumed"); config->machine = AMD64; } config->wordsize = config->is64() ? 8 : 4; // Handle /safeseh, x86 only, on by default, except for mingw. if (config->machine == I386 && args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw)) config->safeSEH = true; // Handle /functionpadmin for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt)) parseFunctionPadMin(arg, config->machine); // Input files can be Windows resource files (.res files). We use // WindowsResource to convert resource files to a regular COFF file, // then link the resulting file normally. if (!resources.empty()) symtab->addFile(make(convertResToCOFF(resources))); if (tar) tar->append("response.txt", createResponseFile(args, filePaths, ArrayRef(searchPaths).slice(1))); // Handle /largeaddressaware config->largeAddressAware = args.hasFlag( OPT_largeaddressaware, OPT_largeaddressaware_no, config->is64()); // Handle /highentropyva config->highEntropyVA = config->is64() && args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true); if (!config->dynamicBase && (config->machine == ARMNT || config->machine == ARM64)) error("/dynamicbase:no is not compatible with " + machineToStr(config->machine)); // Handle /export for (auto *arg : args.filtered(OPT_export)) { Export e = parseExport(arg->getValue()); if (config->machine == I386) { if (!isDecorated(e.name)) e.name = saver.save("_" + e.name); if (!e.extName.empty() && !isDecorated(e.extName)) e.extName = saver.save("_" + e.extName); } config->exports.push_back(e); } // Handle /def if (auto *arg = args.getLastArg(OPT_deffile)) { // parseModuleDefs mutates Config object. parseModuleDefs(arg->getValue()); } // Handle generation of import library from a def file. if (!args.hasArg(OPT_INPUT)) { fixupExports(); createImportLibrary(/*asLib=*/true); return; } // Windows specific -- if no /subsystem is given, we need to infer // that from entry point name. Must happen before /entry handling, // and after the early return when just writing an import library. if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { config->subsystem = inferSubsystem(); if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) fatal("subsystem must be defined"); } // Handle /entry and /dll if (auto *arg = args.getLastArg(OPT_entry)) { config->entry = addUndefined(mangle(arg->getValue())); } else if (!config->entry && !config->noEntry) { if (args.hasArg(OPT_dll)) { StringRef s = (config->machine == I386) ? "__DllMainCRTStartup@12" : "_DllMainCRTStartup"; config->entry = addUndefined(s); } else { // Windows specific -- If entry point name is not given, we need to // infer that from user-defined entry name. StringRef s = findDefaultEntry(); if (s.empty()) fatal("entry point must be defined"); config->entry = addUndefined(s); log("Entry name inferred: " + s); } } // Handle /delayload for (auto *arg : args.filtered(OPT_delayload)) { config->delayLoads.insert(StringRef(arg->getValue()).lower()); if (config->machine == I386) { config->delayLoadHelper = addUndefined("___delayLoadHelper2@8"); } else { config->delayLoadHelper = addUndefined("__delayLoadHelper2"); } } // Set default image name if neither /out or /def set it. if (config->outputFile.empty()) { config->outputFile = getOutputPath((*args.filtered(OPT_INPUT).begin())->getValue()); } // Fail early if an output file is not writable. if (auto e = tryCreateFile(config->outputFile)) { error("cannot open output file " + config->outputFile + ": " + e.message()); return; } if (shouldCreatePDB) { // Put the PDB next to the image if no /pdb flag was passed. if (config->pdbPath.empty()) { config->pdbPath = config->outputFile; sys::path::replace_extension(config->pdbPath, ".pdb"); } // The embedded PDB path should be the absolute path to the PDB if no // /pdbaltpath flag was passed. if (config->pdbAltPath.empty()) { config->pdbAltPath = config->pdbPath; // It's important to make the path absolute and remove dots. This path // will eventually be written into the PE header, and certain Microsoft // tools won't work correctly if these assumptions are not held. sys::fs::make_absolute(config->pdbAltPath); sys::path::remove_dots(config->pdbAltPath); } else { // Don't do this earlier, so that Config->OutputFile is ready. parsePDBAltPath(config->pdbAltPath); } } // Set default image base if /base is not given. if (config->imageBase == uint64_t(-1)) config->imageBase = getDefaultImageBase(); symtab->addSynthetic(mangle("__ImageBase"), nullptr); if (config->machine == I386) { symtab->addAbsolute("___safe_se_handler_table", 0); symtab->addAbsolute("___safe_se_handler_count", 0); } symtab->addAbsolute(mangle("__guard_fids_count"), 0); symtab->addAbsolute(mangle("__guard_fids_table"), 0); symtab->addAbsolute(mangle("__guard_flags"), 0); symtab->addAbsolute(mangle("__guard_iat_count"), 0); symtab->addAbsolute(mangle("__guard_iat_table"), 0); symtab->addAbsolute(mangle("__guard_longjmp_count"), 0); symtab->addAbsolute(mangle("__guard_longjmp_table"), 0); // Needed for MSVC 2017 15.5 CRT. symtab->addAbsolute(mangle("__enclave_config"), 0); if (config->mingw) { symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); symtab->addAbsolute(mangle("__CTOR_LIST__"), 0); symtab->addAbsolute(mangle("__DTOR_LIST__"), 0); } // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we // converge. do { // Windows specific -- if entry point is not found, // search for its mangled names. if (config->entry) mangleMaybe(config->entry); // Windows specific -- Make sure we resolve all dllexported symbols. for (Export &e : config->exports) { if (!e.forwardTo.empty()) continue; e.sym = addUndefined(e.name); if (!e.directives) e.symbolName = mangleMaybe(e.sym); } // Add weak aliases. Weak aliases is a mechanism to give remaining // undefined symbols final chance to be resolved successfully. for (auto pair : config->alternateNames) { StringRef from = pair.first; StringRef to = pair.second; Symbol *sym = symtab->find(from); if (!sym) continue; if (auto *u = dyn_cast(sym)) if (!u->weakAlias) u->weakAlias = symtab->addUndefined(to); } + + // If any inputs are bitcode files, the LTO code generator may create + // references to library functions that are not explicit in the bitcode + // file's symbol table. If any of those library functions are defined in a + // bitcode file in an archive member, we need to arrange to use LTO to + // compile those archive members by adding them to the link beforehand. + if (!BitcodeFile::instances.empty()) + for (const char *s : libcallRoutineNames) + symtab->addLibcall(s); // Windows specific -- if __load_config_used can be resolved, resolve it. if (symtab->findUnderscore("_load_config_used")) addUndefined(mangle("_load_config_used")); } while (run()); if (errorCount()) return; // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files (unless -thinlto-index-only was given, in which case we // resolve symbols and write indices, but don't generate native code or link). symtab->addCombinedLTOObjects(); // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. if (config->thinLTOIndexOnly) return; // If we generated native object files from bitcode files, this resolves // references to the symbols we use from them. run(); if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) if (dyn_cast_or_null(symtab->find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } if (config->mingw) { // Load any further object files that might be needed for doing automatic // imports. // // For cases with no automatically imported symbols, this iterates once // over the symbol table and doesn't do anything. // // For the normal case with a few automatically imported symbols, this // should only need to be run once, since each new object file imported // is an import library and wouldn't add any new undefined references, // but there's nothing stopping the __imp_ symbols from coming from a // normal object file as well (although that won't be used for the // actual autoimport later on). If this pass adds new undefined references, // we won't iterate further to resolve them. symtab->loadMinGWAutomaticImports(); run(); } // Make sure we have resolved all symbols. symtab->reportRemainingUndefines(); if (errorCount()) return; if (config->mingw) { // In MinGW, all symbols are automatically exported if no symbols // are chosen to be exported. maybeExportMinGWSymbols(args); // Make sure the crtend.o object is the last object file. This object // file can contain terminating section chunks that need to be placed // last. GNU ld processes files and static libraries explicitly in the // order provided on the command line, while lld will pull in needed // files from static libraries only after the last object file on the // command line. for (auto i = ObjFile::instances.begin(), e = ObjFile::instances.end(); i != e; i++) { ObjFile *file = *i; if (isCrtend(file->getName())) { ObjFile::instances.erase(i); ObjFile::instances.push_back(file); break; } } } // Windows specific -- when we are creating a .dll file, we also // need to create a .lib file. if (!config->exports.empty() || config->dll) { fixupExports(); createImportLibrary(/*asLib=*/false); assignExportOrdinals(); } // Handle /output-def (MinGW specific). if (auto *arg = args.getLastArg(OPT_output_def)) writeDefFile(arg->getValue()); // Set extra alignment for .comm symbols for (auto pair : config->alignComm) { StringRef name = pair.first; uint32_t alignment = pair.second; Symbol *sym = symtab->find(name); if (!sym) { warn("/aligncomm symbol " + name + " not found"); continue; } // If the symbol isn't common, it must have been replaced with a regular // symbol, which will carry its own alignment. auto *dc = dyn_cast(sym); if (!dc) continue; CommonChunk *c = dc->getChunk(); c->setAlignment(std::max(c->getAlignment(), alignment)); } // Windows specific -- Create a side-by-side manifest file. if (config->manifest == Configuration::SideBySide) createSideBySideManifest(); // Handle /order. We want to do this at this moment because we // need a complete list of comdat sections to warn on nonexistent // functions. if (auto *arg = args.getLastArg(OPT_order)) parseOrderFile(arg->getValue()); // Identify unreferenced COMDAT sections. if (config->doGC) markLive(symtab->getChunks()); // Needs to happen after the last call to addFile(). diagnoseMultipleResourceObjFiles(); // Identify identical COMDAT sections to merge them. if (config->doICF) { findKeepUniqueSections(); doICF(symtab->getChunks()); } // Write the result. writeResult(); // Stop early so we can print the results. Timer::root().stop(); if (config->showTiming) Timer::root().print(); } } // namespace coff } // namespace lld Index: vendor/lld/dist-release_90/COFF/Driver.h =================================================================== --- vendor/lld/dist-release_90/COFF/Driver.h (revision 351717) +++ vendor/lld/dist-release_90/COFF/Driver.h (revision 351718) @@ -1,202 +1,202 @@ //===- Driver.h -------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_DRIVER_H #define LLD_COFF_DRIVER_H #include "Config.h" #include "SymbolTable.h" #include "lld/Common/LLVM.h" #include "lld/Common/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/TarWriter.h" #include #include #include namespace lld { namespace coff { class LinkerDriver; extern LinkerDriver *driver; using llvm::COFF::MachineTypes; using llvm::COFF::WindowsSubsystem; using llvm::Optional; class COFFOptTable : public llvm::opt::OptTable { public: COFFOptTable(); }; class ArgParser { public: // Concatenate LINK environment variable and given arguments and parse them. llvm::opt::InputArgList parseLINK(std::vector args); // Tokenizes a given string and then parses as command line options. llvm::opt::InputArgList parse(StringRef s) { return parse(tokenize(s)); } // Tokenizes a given string and then parses as command line options in // .drectve section. /EXPORT options are returned in second element // to be processed in fastpath. std::pair> parseDirectives(StringRef s); private: // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef args); std::vector tokenize(StringRef s); COFFOptTable table; }; class LinkerDriver { public: void link(llvm::ArrayRef args); // Used by the resolver to parse .drectve section contents. void parseDirectives(InputFile *file); // Used by ArchiveFile to enqueue members. - void enqueueArchiveMember(const Archive::Child &c, StringRef symName, + void enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym, StringRef parentName); MemoryBufferRef takeBuffer(std::unique_ptr mb); void enqueuePath(StringRef path, bool wholeArchive); private: std::unique_ptr tar; // for /linkrepro // Opens a file. Path has to be resolved already. MemoryBufferRef openFile(StringRef path); // Searches a file from search paths. Optional findFile(StringRef filename); Optional findLib(StringRef filename); StringRef doFindFile(StringRef filename); StringRef doFindLib(StringRef filename); StringRef doFindLibMinGW(StringRef filename); // Parses LIB environment which contains a list of search paths. void addLibSearchPaths(); // Library search path. The first element is always "" (current directory). std::vector searchPaths; void maybeExportMinGWSymbols(const llvm::opt::InputArgList &args); // We don't want to add the same file more than once. // Files are uniquified by their filesystem and file number. std::set visitedFiles; std::set visitedLibs; Symbol *addUndefined(StringRef sym); StringRef mangleMaybe(Symbol *s); // Windows specific -- "main" is not the only main function in Windows. // You can choose one from these four -- {w,}{WinMain,main}. // There are four different entry point functions for them, // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to // choose the right one depending on which "main" function is defined. // This function looks up the symbol table and resolve corresponding // entry point name. StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); void addBuffer(std::unique_ptr mb, bool wholeArchive); void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName, StringRef parentName, uint64_t offsetInArchive); void enqueueTask(std::function task); bool run(); std::list> taskQueue; std::vector filePaths; std::vector resources; llvm::StringSet<> directivesExports; }; // Functions below this line are defined in DriverUtils.cpp. void printHelp(const char *argv0); // Parses a string in the form of "[,]". void parseNumbers(StringRef arg, uint64_t *addr, uint64_t *size = nullptr); void parseGuard(StringRef arg); // Parses a string in the form of "[.]". // Minor's default value is 0. void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor); // Parses a string in the form of "[,[.]]". void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major, uint32_t *minor); void parseAlternateName(StringRef); void parseMerge(StringRef); void parseSection(StringRef); void parseAligncomm(StringRef); // Parses a string in the form of "[:]" void parseFunctionPadMin(llvm::opt::Arg *a, llvm::COFF::MachineTypes machine); // Parses a string in the form of "EMBED[,=]|NO". void parseManifest(StringRef arg); // Parses a string in the form of "level=|uiAccess=" void parseManifestUAC(StringRef arg); // Parses a string in the form of "cd|net[,(cd|net)]*" void parseSwaprun(StringRef arg); // Create a resource file containing a manifest XML. std::unique_ptr createManifestRes(); void createSideBySideManifest(); // Used for dllexported symbols. Export parseExport(StringRef arg); void fixupExports(); void assignExportOrdinals(); // Parses a string in the form of "key=value" and check // if value matches previous values for the key. // This feature used in the directive section to reject // incompatible objects. void checkFailIfMismatch(StringRef arg, InputFile *source); // Convert Windows resource files (.res files) to a .obj file. MemoryBufferRef convertResToCOFF(ArrayRef mbs); void runMSVCLinker(std::string rsp, ArrayRef objects); // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, #define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, #include "Options.inc" #undef OPTION }; } // namespace coff } // namespace lld #endif Index: vendor/lld/dist-release_90/COFF/InputFiles.cpp =================================================================== --- vendor/lld/dist-release_90/COFF/InputFiles.cpp (revision 351717) +++ vendor/lld/dist-release_90/COFF/InputFiles.cpp (revision 351718) @@ -1,881 +1,881 @@ //===- InputFiles.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Chunks.h" #include "Config.h" #include "DebugTypes.h" #include "Driver.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm-c/lto.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Target/TargetOptions.h" #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::codeview; using namespace llvm::object; using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; namespace lld { namespace coff { std::vector ObjFile::instances; std::vector ImportFile::instances; std::vector BitcodeFile::instances; /// Checks that Source is compatible with being a weak alias to Target. /// If Source is Undefined and has no weak alias set, makes it a weak /// alias to Target. static void checkAndSetWeakAlias(SymbolTable *symtab, InputFile *f, Symbol *source, Symbol *target) { if (auto *u = dyn_cast(source)) { if (u->weakAlias && u->weakAlias != target) { // Weak aliases as produced by GCC are named in the form // .weak.., where is the name // of another symbol emitted near the weak symbol. // Just use the definition from the first object file that defined // this weak symbol. if (config->mingw) return; symtab->reportDuplicate(source, f); } u->weakAlias = target; } } ArchiveFile::ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {} void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. file = CHECK(Archive::create(mb), this); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &sym : file->symbols()) symtab->addLazy(this, sym); } // Returns a buffer pointing to a member file containing a given symbol. -void ArchiveFile::addMember(const Archive::Symbol *sym) { +void ArchiveFile::addMember(const Archive::Symbol &sym) { const Archive::Child &c = - CHECK(sym->getMember(), - "could not get the member for symbol " + sym->getName()); + CHECK(sym.getMember(), + "could not get the member for symbol " + toCOFFString(sym)); // Return an empty buffer if we have already returned the same buffer. if (!seen.insert(c.getChildOffset()).second) return; - driver->enqueueArchiveMember(c, sym->getName(), getName()); + driver->enqueueArchiveMember(c, sym, getName()); } std::vector getArchiveMembers(Archive *file) { std::vector v; Error err = Error::success(); for (const ErrorOr &cOrErr : file->children(err)) { Archive::Child c = CHECK(cOrErr, file->getFileName() + ": could not get the child of the archive"); MemoryBufferRef mbref = CHECK(c.getMemoryBufferRef(), file->getFileName() + ": could not get the buffer for a child of the archive"); v.push_back(mbref); } if (err) fatal(file->getFileName() + ": Archive::children failed: " + toString(std::move(err))); return v; } void ObjFile::parse() { // Parse a memory buffer as a COFF file. std::unique_ptr bin = CHECK(createBinary(mb), this); if (auto *obj = dyn_cast(bin.get())) { bin.release(); coffObj.reset(obj); } else { fatal(toString(this) + " is not a COFF file"); } // Read section and symbol tables. initializeChunks(); initializeSymbols(); initializeFlags(); initializeDependencies(); } const coff_section* ObjFile::getSection(uint32_t i) { const coff_section *sec; if (auto ec = coffObj->getSection(i, sec)) fatal("getSection failed: #" + Twine(i) + ": " + ec.message()); return sec; } // We set SectionChunk pointers in the SparseChunks vector to this value // temporarily to mark comdat sections as having an unknown resolution. As we // walk the object file's symbol table, once we visit either a leader symbol or // an associative section definition together with the parent comdat's leader, // we set the pointer to either nullptr (to mark the section as discarded) or a // valid SectionChunk for that section. static SectionChunk *const pendingComdat = reinterpret_cast(1); void ObjFile::initializeChunks() { uint32_t numSections = coffObj->getNumberOfSections(); chunks.reserve(numSections); sparseChunks.resize(numSections + 1); for (uint32_t i = 1; i < numSections + 1; ++i) { const coff_section *sec = getSection(i); if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT) sparseChunks[i] = pendingComdat; else sparseChunks[i] = readSection(i, nullptr, ""); } } SectionChunk *ObjFile::readSection(uint32_t sectionNumber, const coff_aux_section_definition *def, StringRef leaderName) { const coff_section *sec = getSection(sectionNumber); StringRef name; if (Expected e = coffObj->getSectionName(sec)) name = *e; else fatal("getSectionName failed: #" + Twine(sectionNumber) + ": " + toString(e.takeError())); if (name == ".drectve") { ArrayRef data; cantFail(coffObj->getSectionContents(sec, data)); directives = StringRef((const char *)data.data(), data.size()); return nullptr; } if (name == ".llvm_addrsig") { addrsigSec = sec; return nullptr; } // Object files may have DWARF debug info or MS CodeView debug info // (or both). // // DWARF sections don't need any special handling from the perspective // of the linker; they are just a data section containing relocations. // We can just link them to complete debug info. // // CodeView needs linker support. We need to interpret debug info, // and then write it to a separate .pdb file. // Ignore DWARF debug info unless /debug is given. if (!config->debug && name.startswith(".debug_")) return nullptr; if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) return nullptr; auto *c = make(this, sec); if (def) c->checksum = def->CheckSum; // link.exe uses the presence of .rsrc$01 for LNK4078, so match that. if (name == ".rsrc$01") isResourceObjFile = true; // CodeView sections are stored to a different vector because they are not // linked in the regular manner. if (c->isCodeView()) debugChunks.push_back(c); else if (name == ".gfids$y") guardFidChunks.push_back(c); else if (name == ".gljmp$y") guardLJmpChunks.push_back(c); else if (name == ".sxdata") sXDataChunks.push_back(c); else if (config->tailMerge && sec->NumberOfRelocations == 0 && name == ".rdata" && leaderName.startswith("??_C@")) // COFF sections that look like string literal sections (i.e. no // relocations, in .rdata, leader symbol name matches the MSVC name mangling // for string literals) are subject to string tail merging. MergeChunk::addSection(c); else chunks.push_back(c); return c; } void ObjFile::readAssociativeDefinition( COFFSymbolRef sym, const coff_aux_section_definition *def) { readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj())); } void ObjFile::readAssociativeDefinition(COFFSymbolRef sym, const coff_aux_section_definition *def, uint32_t parentIndex) { SectionChunk *parent = sparseChunks[parentIndex]; int32_t sectionNumber = sym.getSectionNumber(); auto diag = [&]() { StringRef name, parentName; coffObj->getSymbolName(sym, name); const coff_section *parentSec = getSection(parentIndex); if (Expected e = coffObj->getSectionName(parentSec)) parentName = *e; error(toString(this) + ": associative comdat " + name + " (sec " + Twine(sectionNumber) + ") has invalid reference to section " + parentName + " (sec " + Twine(parentIndex) + ")"); }; if (parent == pendingComdat) { // This can happen if an associative comdat refers to another associative // comdat that appears after it (invalid per COFF spec) or to a section // without any symbols. diag(); return; } // Check whether the parent is prevailing. If it is, so are we, and we read // the section; otherwise mark it as discarded. if (parent) { SectionChunk *c = readSection(sectionNumber, def, ""); sparseChunks[sectionNumber] = c; if (c) { c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE; parent->addAssociative(c); } } else { sparseChunks[sectionNumber] = nullptr; } } void ObjFile::recordPrevailingSymbolForMingw( COFFSymbolRef sym, DenseMap &prevailingSectionMap) { // For comdat symbols in executable sections, where this is the copy // of the section chunk we actually include instead of discarding it, // add the symbol to a map to allow using it for implicitly // associating .[px]data$ sections to it. int32_t sectionNumber = sym.getSectionNumber(); SectionChunk *sc = sparseChunks[sectionNumber]; if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { StringRef name; coffObj->getSymbolName(sym, name); if (getMachineType() == I386) name.consume_front("_"); prevailingSectionMap[name] = sectionNumber; } } void ObjFile::maybeAssociateSEHForMingw( COFFSymbolRef sym, const coff_aux_section_definition *def, const DenseMap &prevailingSectionMap) { StringRef name; coffObj->getSymbolName(sym, name); if (name.consume_front(".pdata$") || name.consume_front(".xdata$") || name.consume_front(".eh_frame$")) { // For MinGW, treat .[px]data$ and .eh_frame$ as implicitly // associative to the symbol . auto parentSym = prevailingSectionMap.find(name); if (parentSym != prevailingSectionMap.end()) readAssociativeDefinition(sym, def, parentSym->second); } } Symbol *ObjFile::createRegular(COFFSymbolRef sym) { SectionChunk *sc = sparseChunks[sym.getSectionNumber()]; if (sym.isExternal()) { StringRef name; coffObj->getSymbolName(sym, name); if (sc) return symtab->addRegular(this, name, sym.getGeneric(), sc); // For MinGW symbols named .weak.* that point to a discarded section, // don't create an Undefined symbol. If nothing ever refers to the symbol, // everything should be fine. If something actually refers to the symbol // (e.g. the undefined weak alias), linking will fail due to undefined // references at the end. if (config->mingw && name.startswith(".weak.")) return nullptr; return symtab->addUndefined(name, this, false); } if (sc) return make(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, sym.getGeneric(), sc); return nullptr; } void ObjFile::initializeSymbols() { uint32_t numSymbols = coffObj->getNumberOfSymbols(); symbols.resize(numSymbols); SmallVector, 8> weakAliases; std::vector pendingIndexes; pendingIndexes.reserve(numSymbols); DenseMap prevailingSectionMap; std::vector comdatDefs( coffObj->getNumberOfSections() + 1); for (uint32_t i = 0; i < numSymbols; ++i) { COFFSymbolRef coffSym = check(coffObj->getSymbol(i)); bool prevailingComdat; if (coffSym.isUndefined()) { symbols[i] = createUndefined(coffSym); } else if (coffSym.isWeakExternal()) { symbols[i] = createUndefined(coffSym); uint32_t tagIndex = coffSym.getAux()->TagIndex; weakAliases.emplace_back(symbols[i], tagIndex); } else if (Optional optSym = createDefined(coffSym, comdatDefs, prevailingComdat)) { symbols[i] = *optSym; if (config->mingw && prevailingComdat) recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap); } else { // createDefined() returns None if a symbol belongs to a section that // was pending at the point when the symbol was read. This can happen in // two cases: // 1) section definition symbol for a comdat leader; // 2) symbol belongs to a comdat section associated with another section. // In both of these cases, we can expect the section to be resolved by // the time we finish visiting the remaining symbols in the symbol // table. So we postpone the handling of this symbol until that time. pendingIndexes.push_back(i); } i += coffSym.getNumberOfAuxSymbols(); } for (uint32_t i : pendingIndexes) { COFFSymbolRef sym = check(coffObj->getSymbol(i)); if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(sym, def); else if (config->mingw) maybeAssociateSEHForMingw(sym, def, prevailingSectionMap); } if (sparseChunks[sym.getSectionNumber()] == pendingComdat) { StringRef name; coffObj->getSymbolName(sym, name); log("comdat section " + name + " without leader and unassociated, discarding"); continue; } symbols[i] = createRegular(sym); } for (auto &kv : weakAliases) { Symbol *sym = kv.first; uint32_t idx = kv.second; checkAndSetWeakAlias(symtab, this, sym, symbols[idx]); } } Symbol *ObjFile::createUndefined(COFFSymbolRef sym) { StringRef name; coffObj->getSymbolName(sym, name); return symtab->addUndefined(name, this, sym.isWeakExternal()); } void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection, bool &prevailing, DefinedRegular *leader) { if (prevailing) return; // There's already an existing comdat for this symbol: `Leader`. // Use the comdats's selection field to determine if the new // symbol in `Sym` should be discarded, produce a duplicate symbol // error, etc. SectionChunk *leaderChunk = nullptr; COMDATType leaderSelection = IMAGE_COMDAT_SELECT_ANY; if (leader->data) { leaderChunk = leader->getChunk(); leaderSelection = leaderChunk->selection; } else { // FIXME: comdats from LTO files don't know their selection; treat them // as "any". selection = leaderSelection; } if ((selection == IMAGE_COMDAT_SELECT_ANY && leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) || (selection == IMAGE_COMDAT_SELECT_LARGEST && leaderSelection == IMAGE_COMDAT_SELECT_ANY)) { // cl.exe picks "any" for vftables when building with /GR- and // "largest" when building with /GR. To be able to link object files // compiled with each flag, "any" and "largest" are merged as "largest". leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST; } // Other than that, comdat selections must match. This is a bit more // strict than link.exe which allows merging "any" and "largest" if "any" // is the first symbol the linker sees, and it allows merging "largest" // with everything (!) if "largest" is the first symbol the linker sees. // Making this symmetric independent of which selection is seen first // seems better though. // (This behavior matches ModuleLinker::getComdatResult().) if (selection != leaderSelection) { log(("conflicting comdat type for " + toString(*leader) + ": " + Twine((int)leaderSelection) + " in " + toString(leader->getFile()) + " and " + Twine((int)selection) + " in " + toString(this)) .str()); symtab->reportDuplicate(leader, this); return; } switch (selection) { case IMAGE_COMDAT_SELECT_NODUPLICATES: symtab->reportDuplicate(leader, this); break; case IMAGE_COMDAT_SELECT_ANY: // Nothing to do. break; case IMAGE_COMDAT_SELECT_SAME_SIZE: if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) symtab->reportDuplicate(leader, this); break; case IMAGE_COMDAT_SELECT_EXACT_MATCH: { SectionChunk newChunk(this, getSection(sym)); // link.exe only compares section contents here and doesn't complain // if the two comdat sections have e.g. different alignment. // Match that. if (leaderChunk->getContents() != newChunk.getContents()) symtab->reportDuplicate(leader, this); break; } case IMAGE_COMDAT_SELECT_ASSOCIATIVE: // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE. // (This means lld-link doesn't produce duplicate symbol errors for // associative comdats while link.exe does, but associate comdats // are never extern in practice.) llvm_unreachable("createDefined not called for associative comdats"); case IMAGE_COMDAT_SELECT_LARGEST: if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) { // Replace the existing comdat symbol with the new one. StringRef name; coffObj->getSymbolName(sym, name); // FIXME: This is incorrect: With /opt:noref, the previous sections // make it into the final executable as well. Correct handling would // be to undo reading of the whole old section that's being replaced, // or doing one pass that determines what the final largest comdat // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading // only the largest one. replaceSymbol(leader, this, name, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym.getGeneric(), nullptr); prevailing = true; } break; case IMAGE_COMDAT_SELECT_NEWEST: llvm_unreachable("should have been rejected earlier"); } } Optional ObjFile::createDefined( COFFSymbolRef sym, std::vector &comdatDefs, bool &prevailing) { prevailing = false; auto getName = [&]() { StringRef s; coffObj->getSymbolName(sym, s); return s; }; if (sym.isCommon()) { auto *c = make(sym); chunks.push_back(c); return symtab->addCommon(this, getName(), sym.getValue(), sym.getGeneric(), c); } if (sym.isAbsolute()) { StringRef name = getName(); // Skip special symbols. if (name == "@comp.id") return nullptr; if (name == "@feat.00") { feat00Flags = sym.getValue(); return nullptr; } if (sym.isExternal()) return symtab->addAbsolute(name, sym); return make(name, sym); } int32_t sectionNumber = sym.getSectionNumber(); if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) return nullptr; if (llvm::COFF::isReservedSectionNumber(sectionNumber)) fatal(toString(this) + ": " + getName() + " should not refer to special section " + Twine(sectionNumber)); if ((uint32_t)sectionNumber >= sparseChunks.size()) fatal(toString(this) + ": " + getName() + " should not refer to non-existent section " + Twine(sectionNumber)); // Comdat handling. // A comdat symbol consists of two symbol table entries. // The first symbol entry has the name of the section (e.g. .text), fixed // values for the other fields, and one auxilliary record. // The second symbol entry has the name of the comdat symbol, called the // "comdat leader". // When this function is called for the first symbol entry of a comdat, // it sets comdatDefs and returns None, and when it's called for the second // symbol entry it reads comdatDefs and then sets it back to nullptr. // Handle comdat leader. if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) { comdatDefs[sectionNumber] = nullptr; DefinedRegular *leader; if (sym.isExternal()) { std::tie(leader, prevailing) = symtab->addComdat(this, getName(), sym.getGeneric()); } else { leader = make(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, sym.getGeneric()); prevailing = true; } if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES || // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either. def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) { fatal("unknown comdat type " + std::to_string((int)def->Selection) + " for " + getName() + " in " + toString(this)); } COMDATType selection = (COMDATType)def->Selection; if (leader->isCOMDAT) handleComdatSelection(sym, selection, prevailing, leader); if (prevailing) { SectionChunk *c = readSection(sectionNumber, def, getName()); sparseChunks[sectionNumber] = c; c->sym = cast(leader); c->selection = selection; cast(leader)->data = &c->repl; } else { sparseChunks[sectionNumber] = nullptr; } return leader; } // Prepare to handle the comdat leader symbol by setting the section's // ComdatDefs pointer if we encounter a non-associative comdat. if (sparseChunks[sectionNumber] == pendingComdat) { if (const coff_aux_section_definition *def = sym.getSectionDefinition()) { if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE) comdatDefs[sectionNumber] = def; } return None; } return createRegular(sym); } MachineTypes ObjFile::getMachineType() { if (coffObj) return static_cast(coffObj->getMachine()); return IMAGE_FILE_MACHINE_UNKNOWN; } ArrayRef ObjFile::getDebugSection(StringRef secName) { if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName)) return sec->consumeDebugMagic(); return {}; } // OBJ files systematically store critical informations in a .debug$S stream, // even if the TU was compiled with no debug info. At least two records are // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is // currently used to initialize the hotPatchable member. void ObjFile::initializeFlags() { ArrayRef data = getDebugSection(".debug$S"); if (data.empty()) return; DebugSubsectionArray subsections; BinaryStreamReader reader(data, support::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, data.size())); for (const DebugSubsectionRecord &ss : subsections) { if (ss.kind() != DebugSubsectionKind::Symbols) continue; unsigned offset = 0; // Only parse the first two records. We are only looking for S_OBJNAME // and S_COMPILE3, and they usually appear at the beginning of the // stream. for (unsigned i = 0; i < 2; ++i) { Expected sym = readSymbolFromStream(ss.getRecordData(), offset); if (!sym) { consumeError(sym.takeError()); return; } if (sym->kind() == SymbolKind::S_COMPILE3) { auto cs = cantFail(SymbolDeserializer::deserializeAs(sym.get())); hotPatchable = (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None; } if (sym->kind() == SymbolKind::S_OBJNAME) { auto objName = cantFail(SymbolDeserializer::deserializeAs( sym.get())); pchSignature = objName.Signature; } offset += sym->length(); } } } // Depending on the compilation flags, OBJs can refer to external files, // necessary to merge this OBJ into the final PDB. We currently support two // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu. // And PDB type servers, when compiling with /Zi. This function extracts these // dependencies and makes them available as a TpiSource interface (see // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular // output even with /Yc and /Yu and with /Zi. void ObjFile::initializeDependencies() { if (!config->debug) return; bool isPCH = false; ArrayRef data = getDebugSection(".debug$P"); if (!data.empty()) isPCH = true; else data = getDebugSection(".debug$T"); if (data.empty()) return; CVTypeArray types; BinaryStreamReader reader(data, support::little); cantFail(reader.readArray(types, reader.getLength())); CVTypeArray::Iterator firstType = types.begin(); if (firstType == types.end()) return; debugTypes.emplace(types); if (isPCH) { debugTypesObj = makePrecompSource(this); return; } if (firstType->kind() == LF_TYPESERVER2) { TypeServer2Record ts = cantFail( TypeDeserializer::deserializeAs(firstType->data())); debugTypesObj = makeUseTypeServerSource(this, &ts); return; } if (firstType->kind() == LF_PRECOMP) { PrecompRecord precomp = cantFail( TypeDeserializer::deserializeAs(firstType->data())); debugTypesObj = makeUsePrecompSource(this, &precomp); return; } debugTypesObj = makeTpiSource(this); } StringRef ltrim1(StringRef s, const char *chars) { if (!s.empty() && strchr(chars, s[0])) return s.substr(1); return s; } void ImportFile::parse() { const char *buf = mb.getBufferStart(); const auto *hdr = reinterpret_cast(buf); // Check if the total size is valid. if (mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData) fatal("broken import library"); // Read names and create an __imp_ symbol. StringRef name = saver.save(StringRef(buf + sizeof(*hdr))); StringRef impName = saver.save("__imp_" + name); const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1; dllName = StringRef(nameStart); StringRef extName; switch (hdr->getNameType()) { case IMPORT_ORDINAL: extName = ""; break; case IMPORT_NAME: extName = name; break; case IMPORT_NAME_NOPREFIX: extName = ltrim1(name, "?@_"); break; case IMPORT_NAME_UNDECORATE: extName = ltrim1(name, "?@_"); extName = extName.substr(0, extName.find('@')); break; } this->hdr = hdr; externalName = extName; impSym = symtab->addImportData(impName, this); // If this was a duplicate, we logged an error but may continue; // in this case, impSym is nullptr. if (!impSym) return; if (hdr->getType() == llvm::COFF::IMPORT_CONST) static_cast(symtab->addImportData(name, this)); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (hdr->getType() == llvm::COFF::IMPORT_CODE) thunkSym = symtab->addImportThunk( name, cast_or_null(impSym), hdr->Machine); } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) : InputFile(BitcodeKind, mb) { std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) path = replaceThinLTOSuffix(mb.getBufferIdentifier()); // ThinLTO assumes that all MemoryBufferRefs given to it have a unique // name. If two archives define two members with the same name, this // causes a collision which result in only one of the objects being taken // into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). So we append file offset to make // filename unique. MemoryBufferRef mbref( mb.getBuffer(), saver.save(archiveName + path + (archiveName.empty() ? "" : utostr(offsetInArchive)))); obj = check(lto::InputFile::create(mbref)); } void BitcodeFile::parse() { std::vector> comdat(obj->getComdatTable().size()); for (size_t i = 0; i != obj->getComdatTable().size(); ++i) // FIXME: lto::InputFile doesn't keep enough data to do correct comdat // selection handling. comdat[i] = symtab->addComdat(this, saver.save(obj->getComdatTable()[i])); for (const lto::InputFile::Symbol &objSym : obj->symbols()) { StringRef symName = saver.save(objSym.getName()); int comdatIndex = objSym.getComdatIndex(); Symbol *sym; if (objSym.isUndefined()) { sym = symtab->addUndefined(symName, this, false); } else if (objSym.isCommon()) { sym = symtab->addCommon(this, symName, objSym.getCommonSize()); } else if (objSym.isWeak() && objSym.isIndirect()) { // Weak external. sym = symtab->addUndefined(symName, this, true); std::string fallback = objSym.getCOFFWeakExternalFallback(); Symbol *alias = symtab->addUndefined(saver.save(fallback)); checkAndSetWeakAlias(symtab, this, sym, alias); } else if (comdatIndex != -1) { if (symName == obj->getComdatTable()[comdatIndex]) sym = comdat[comdatIndex].first; else if (comdat[comdatIndex].second) sym = symtab->addRegular(this, symName); else sym = symtab->addUndefined(symName, this, false); } else { sym = symtab->addRegular(this, symName); } symbols.push_back(sym); if (objSym.isUsed()) config->gcroot.push_back(sym); } directives = obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { switch (Triple(obj->getTargetTriple()).getArch()) { case Triple::x86_64: return AMD64; case Triple::x86: return I386; case Triple::arm: return ARMNT; case Triple::aarch64: return ARM64; default: return IMAGE_FILE_MACHINE_UNKNOWN; } } std::string replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; if (path.consume_back(suffix)) return (path + repl).str(); return path; } } // namespace coff } // namespace lld // Returns the last element of a path, which is supposed to be a filename. static StringRef getBasename(StringRef path) { return sys::path::filename(path, sys::path::Style::windows); } // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". std::string lld::toString(const coff::InputFile *file) { if (!file) return ""; if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) return file->getName(); return (getBasename(file->parentName) + "(" + getBasename(file->getName()) + ")") .str(); } Index: vendor/lld/dist-release_90/COFF/InputFiles.h =================================================================== --- vendor/lld/dist-release_90/COFF/InputFiles.h (revision 351717) +++ vendor/lld/dist-release_90/COFF/InputFiles.h (revision 351718) @@ -1,321 +1,321 @@ //===- InputFiles.h ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_INPUT_FILES_H #define LLD_COFF_INPUT_FILES_H #include "Config.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" #include #include #include namespace llvm { namespace pdb { class DbiModuleDescriptorBuilder; } } namespace lld { namespace coff { std::vector getArchiveMembers(llvm::object::Archive *file); using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; class Defined; class DefinedImportData; class DefinedImportThunk; class DefinedRegular; class Lazy; class SectionChunk; class Symbol; class Undefined; class TpiSource; // The root class of input files. class InputFile { public: enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; Kind kind() const { return fileKind; } virtual ~InputFile() {} // Returns the filename. StringRef getName() const { return mb.getBufferIdentifier(); } // Reads a file (the constructor doesn't do that). virtual void parse() = 0; // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } MemoryBufferRef mb; // An archive file name if this file is created from an archive. StringRef parentName; // Returns .drectve section contents if exist. StringRef getDirectives() { return directives; } protected: InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} StringRef directives; private: const Kind fileKind; }; // .lib or .a file. class ArchiveFile : public InputFile { public: explicit ArchiveFile(MemoryBufferRef m); static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } void parse() override; // Enqueues an archive member load for the given symbol. If we've already // enqueued a load for the same archive member, this function does nothing, // which ensures that we don't load the same member more than once. - void addMember(const Archive::Symbol *sym); + void addMember(const Archive::Symbol &sym); private: std::unique_ptr file; llvm::DenseSet seen; }; // .obj or .o file. This may be a member of an archive file. class ObjFile : public InputFile { public: explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {} static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } void parse() override; MachineTypes getMachineType() override; ArrayRef getChunks() { return chunks; } ArrayRef getDebugChunks() { return debugChunks; } ArrayRef getSXDataChunks() { return sXDataChunks; } ArrayRef getGuardFidChunks() { return guardFidChunks; } ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } ArrayRef getDebugSection(StringRef secName); // Returns a Symbol object for the symbolIndex'th symbol in the // underlying object file. Symbol *getSymbol(uint32_t symbolIndex) { return symbols[symbolIndex]; } // Returns the underlying COFF file. COFFObjectFile *getCOFFObj() { return coffObj.get(); } // Add a symbol for a range extension thunk. Return the new symbol table // index. This index can be used to modify a relocation. uint32_t addRangeThunkSymbol(Symbol *thunk) { symbols.push_back(thunk); return symbols.size() - 1; } static std::vector instances; // Flags in the absolute @feat.00 symbol if it is present. These usually // indicate if an object was compiled with certain security features enabled // like stack guard, safeseh, /guard:cf, or other things. uint32_t feat00Flags = 0; // True if this object file is compatible with SEH. COFF-specific and // x86-only. COFF spec 5.10.1. The .sxdata section. bool hasSafeSEH() { return feat00Flags & 0x1; } // True if this file was compiled with /guard:cf. bool hasGuardCF() { return feat00Flags & 0x800; } // Pointer to the PDB module descriptor builder. Various debug info records // will reference object files by "module index", which is here. Things like // source files and section contributions are also recorded here. Will be null // if we are not producing a PDB. llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; const coff_section *addrsigSec = nullptr; // When using Microsoft precompiled headers, this is the PCH's key. // The same key is used by both the precompiled object, and objects using the // precompiled object. Any difference indicates out-of-date objects. llvm::Optional pchSignature; // Whether this is an object file created from .res files. bool isResourceObjFile = false; // Whether this file was compiled with /hotpatch. bool hotPatchable = false; // Whether the object was already merged into the final PDB. bool mergedIntoPDB = false; // If the OBJ has a .debug$T stream, this tells how it will be handled. TpiSource *debugTypesObj = nullptr; // The .debug$T stream if there's one. llvm::Optional debugTypes; private: const coff_section* getSection(uint32_t i); const coff_section *getSection(COFFSymbolRef sym) { return getSection(sym.getSectionNumber()); } void initializeChunks(); void initializeSymbols(); void initializeFlags(); void initializeDependencies(); SectionChunk * readSection(uint32_t sectionNumber, const llvm::object::coff_aux_section_definition *def, StringRef leaderName); void readAssociativeDefinition( COFFSymbolRef coffSym, const llvm::object::coff_aux_section_definition *def); void readAssociativeDefinition( COFFSymbolRef coffSym, const llvm::object::coff_aux_section_definition *def, uint32_t parentSection); void recordPrevailingSymbolForMingw( COFFSymbolRef coffSym, llvm::DenseMap &prevailingSectionMap); void maybeAssociateSEHForMingw( COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, const llvm::DenseMap &prevailingSectionMap); // Given a new symbol Sym with comdat selection Selection, if the new // symbol is not (yet) Prevailing and the existing comdat leader set to // Leader, emits a diagnostic if the new symbol and its selection doesn't // match the existing symbol and its selection. If either old or new // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace // the existing leader. In that case, Prevailing is set to true. void handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, bool &prevailing, DefinedRegular *leader); llvm::Optional createDefined(COFFSymbolRef sym, std::vector &comdatDefs, bool &prevailingComdat); Symbol *createRegular(COFFSymbolRef sym); Symbol *createUndefined(COFFSymbolRef sym); std::unique_ptr coffObj; // List of all chunks defined by this file. This includes both section // chunks and non-section chunks for common symbols. std::vector chunks; // CodeView debug info sections. std::vector debugChunks; // Chunks containing symbol table indices of exception handlers. Only used for // 32-bit x86. std::vector sXDataChunks; // Chunks containing symbol table indices of address taken symbols and longjmp // targets. These are not linked into the final binary when /guard:cf is set. std::vector guardFidChunks; std::vector guardLJmpChunks; // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. // (Because section number is 1-based, the first slot is always a // null pointer.) std::vector sparseChunks; // This vector contains a list of all symbols defined or referenced by this // file. They are indexed such that you can get a Symbol by symbol // index. Nonexistent indices (which are occupied by auxiliary // symbols in the real symbol table) are filled with null pointers. std::vector symbols; }; // This type represents import library members that contain DLL names // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 // for details about the format. class ImportFile : public InputFile { public: explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {} static bool classof(const InputFile *f) { return f->kind() == ImportKind; } static std::vector instances; Symbol *impSym = nullptr; Symbol *thunkSym = nullptr; std::string dllName; private: void parse() override; public: StringRef externalName; const coff_import_header *hdr; Chunk *location = nullptr; // We want to eliminate dllimported symbols if no one actually refers them. // These "Live" bits are used to keep track of which import library members // are actually in use. // // If the Live bit is turned off by MarkLive, Writer will ignore dllimported // symbols provided by this import library member. We also track whether the // imported symbol is used separately from whether the thunk is used in order // to avoid creating unnecessary thunks. bool live = !config->doGC; bool thunkLive = !config->doGC; }; // Used for LTO. class BitcodeFile : public InputFile { public: BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } ArrayRef getSymbols() { return symbols; } MachineTypes getMachineType() override; static std::vector instances; std::unique_ptr obj; private: void parse() override; std::vector symbols; }; std::string replaceThinLTOSuffix(StringRef path); } // namespace coff std::string toString(const coff::InputFile *file); } // namespace lld #endif Index: vendor/lld/dist-release_90/COFF/SymbolTable.cpp =================================================================== --- vendor/lld/dist-release_90/COFF/SymbolTable.cpp (revision 351717) +++ vendor/lld/dist-release_90/COFF/SymbolTable.cpp (revision 351718) @@ -1,603 +1,615 @@ //===- SymbolTable.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "Config.h" #include "Driver.h" #include "LTO.h" #include "PDB.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace lld { namespace coff { static Timer ltoTimer("LTO", Timer::root()); SymbolTable *symtab; void SymbolTable::addFile(InputFile *file) { log("Reading " + toString(file)); file->parse(); MachineTypes mt = file->getMachineType(); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { config->machine = mt; } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(config->machine)); return; } if (auto *f = dyn_cast(file)) { ObjFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { BitcodeFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { ImportFile::instances.push_back(f); } driver->parseDirectives(file); } static void errorOrWarn(const Twine &s) { if (config->forceUnresolved) warn(s); else error(s); } // Returns the symbol in SC whose value is <= Addr that is closest to Addr. // This is generally the global variable or function whose definition contains // Addr. static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { DefinedRegular *candidate = nullptr; for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; candidate = d; } return candidate; } // Given a file and the index of a symbol in that file, returns a description // of all references to that symbol from that file. If no debug information is // available, returns just the name of the file, else one string per actual // reference as described in the debug info. std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex) { struct Location { Symbol *sym; std::pair fileLine; }; std::vector locations; for (Chunk *c : file->getChunks()) { auto *sc = dyn_cast(c); if (!sc) continue; for (const coff_relocation &r : sc->getRelocs()) { if (r.SymbolTableIndex != symIndex) continue; std::pair fileLine = getFileLine(sc, r.VirtualAddress); Symbol *sym = getSymbol(sc, r.VirtualAddress); if (!fileLine.first.empty() || sym) locations.push_back({sym, fileLine}); } } if (locations.empty()) return std::vector({"\n>>> referenced by " + toString(file)}); std::vector symbolLocations(locations.size()); size_t i = 0; for (Location loc : locations) { llvm::raw_string_ostream os(symbolLocations[i++]); os << "\n>>> referenced by "; if (!loc.fileLine.first.empty()) os << loc.fileLine.first << ":" << loc.fileLine.second << "\n>>> "; os << toString(file); if (loc.sym) os << ":(" << toString(*loc.sym) << ')'; } return symbolLocations; } // For an undefined symbol, stores all files referencing it and the index of // the undefined symbol in each file. struct UndefinedDiag { Symbol *sym; struct File { ObjFile *oFile; uint64_t symIndex; }; std::vector files; }; static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { std::string out; llvm::raw_string_ostream os(out); os << "undefined symbol: " << toString(*undefDiag.sym); const size_t maxUndefReferences = 10; size_t i = 0, numRefs = 0; for (const UndefinedDiag::File &ref : undefDiag.files) { std::vector symbolLocations = getSymbolLocations(ref.oFile, ref.symIndex); numRefs += symbolLocations.size(); for (const std::string &s : symbolLocations) { if (i >= maxUndefReferences) break; os << s; i++; } } if (i < numRefs) os << "\n>>> referenced " << numRefs - i << " more times"; errorOrWarn(os.str()); } void SymbolTable::loadMinGWAutomaticImports() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); if (name.startswith("__imp_")) continue; // If we have an undefined symbol, but we have a Lazy representing a // symbol we could load from file, make sure to load that. Lazy *l = dyn_cast_or_null(find(("__imp_" + name).str())); if (!l || l->pendingArchiveLoad) continue; log("Loading lazy " + l->getName() + " from " + l->file->getName() + " for automatic import"); l->pendingArchiveLoad = true; - l->file->addMember(&l->sym); + l->file->addMember(l->sym); } } bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { if (name.startswith("__imp_")) return false; Defined *imp = dyn_cast_or_null(find(("__imp_" + name).str())); if (!imp) return false; // Replace the reference directly to a variable with a reference // to the import address table instead. This obviously isn't right, // but we mark the symbol as isRuntimePseudoReloc, and a later pass // will add runtime pseudo relocations for every relocation against // this Symbol. The runtime pseudo relocation framework expects the // reference itself to point at the IAT entry. size_t impSize = 0; if (isa(imp)) { log("Automatically importing " + name + " from " + cast(imp)->getDLLName()); impSize = sizeof(DefinedImportData); } else if (isa(imp)) { log("Automatically importing " + name + " from " + toString(cast(imp)->file)); impSize = sizeof(DefinedRegular); } else { warn("unable to automatically import " + name + " from " + imp->getName() + " from " + toString(cast(imp)->file) + "; unexpected symbol type"); return false; } sym->replaceKeepingName(imp, impSize); sym->isRuntimePseudoReloc = true; // There may exist symbols named .refptr. which only consist // of a single pointer to . If it turns out is // automatically imported, we don't need to keep the .refptr. // pointer at all, but redirect all accesses to it to the IAT entry // for __imp_ instead, and drop the whole .refptr. chunk. DefinedRegular *refptr = dyn_cast_or_null(find((".refptr." + name).str())); if (refptr && refptr->getChunk()->getSize() == config->wordsize) { SectionChunk *sc = dyn_cast_or_null(refptr->getChunk()); if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) { log("Replacing .refptr." + name + " with " + imp->getName()); refptr->getChunk()->live = false; refptr->replaceKeepingName(imp, impSize); } } return true; } void SymbolTable::reportRemainingUndefines() { SmallPtrSet undefs; DenseMap localImports; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *d = undef->getWeakAlias()) { // We want to replace Sym with D. However, we can't just blindly // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an // internal symbol, and internal symbols are stored as "unparented" // Symbols. For that reason we need to check which type of symbol we // are dealing with and copy the correct number of bytes. if (isa(d)) memcpy(sym, d, sizeof(DefinedRegular)); else if (isa(d)) memcpy(sym, d, sizeof(DefinedAbsolute)); else memcpy(sym, d, sizeof(SymbolUnion)); continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (name.startswith("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); if (imp && isa(imp)) { auto *d = cast(imp); replaceSymbol(sym, name, d); localImportChunks.push_back(cast(sym)->getChunk()); localImports[sym] = d; continue; } } // We don't want to report missing Microsoft precompiled headers symbols. // A proper message will be emitted instead in PDBLinker::aquirePrecompObj if (name.contains("_PchSym_")) continue; if (config->mingw && handleMinGWAutomaticImport(sym, name)) continue; // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. if (config->forceUnresolved) replaceSymbol(sym, name, 0); undefs.insert(sym); } if (undefs.empty() && localImports.empty()) return; for (Symbol *b : config->gcroot) { if (undefs.count(b)) errorOrWarn(": undefined symbol: " + toString(*b)); if (config->warnLocallyDefinedImported) if (Symbol *imp = localImports.lookup(b)) warn(": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } std::vector undefDiags; DenseMap firstDiag; for (ObjFile *file : ObjFile::instances) { size_t symIndex = (size_t)-1; for (Symbol *sym : file->getSymbols()) { ++symIndex; if (!sym) continue; if (undefs.count(sym)) { auto it = firstDiag.find(sym); if (it == firstDiag.end()) { firstDiag[sym] = undefDiags.size(); undefDiags.push_back({sym, {{file, symIndex}}}); } else { undefDiags[it->second].files.push_back({file, symIndex}); } } if (config->warnLocallyDefinedImported) if (Symbol *imp = localImports.lookup(sym)) warn(toString(file) + ": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } } for (const UndefinedDiag& undefDiag : undefDiags) reportUndefinedSymbol(undefDiag); } std::pair SymbolTable::insert(StringRef name) { bool inserted = false; Symbol *&sym = symMap[CachedHashStringRef(name)]; if (!sym) { sym = reinterpret_cast(make()); sym->isUsedInRegularObj = false; sym->pendingArchiveLoad = false; inserted = true; } return {sym, inserted}; } std::pair SymbolTable::insert(StringRef name, InputFile *file) { std::pair result = insert(name); if (!file || !isa(file)) result.first->isUsedInRegularObj = true; return result; } Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f, bool isWeakAlias) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, f); if (wasInserted || (isa(s) && isWeakAlias)) { replaceSymbol(s, name); return s; } if (auto *l = dyn_cast(s)) { if (!s->pendingArchiveLoad) { s->pendingArchiveLoad = true; - l->file->addMember(&l->sym); + l->file->addMember(l->sym); } } return s; } -void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol sym) { +void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) { StringRef name = sym.getName(); Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); if (wasInserted) { replaceSymbol(s, f, sym); return; } auto *u = dyn_cast(s); if (!u || u->weakAlias || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; - f->addMember(&sym); + f->addMember(sym); } void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) { std::string msg = "duplicate symbol: " + toString(*existing) + " in " + toString(existing->getFile()) + " and in " + toString(newFile); if (config->forceMultiple) warn(msg); else error(msg); } Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, sym); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, va); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, c); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addRegular(InputFile *f, StringRef n, const coff_symbol_generic *sym, SectionChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, /*IsCOMDAT*/ false, /*IsExternal*/ true, sym, c); else reportDuplicate(s, f); return s; } std::pair SymbolTable::addComdat(InputFile *f, StringRef n, const coff_symbol_generic *sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) { replaceSymbol(s, f, n, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym, nullptr); return {cast(s), true}; } auto *existingSymbol = cast(s); if (!existingSymbol->isCOMDAT) reportDuplicate(s, f); return {existingSymbol, false}; } Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *sym, CommonChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, size, sym, c); else if (auto *dc = dyn_cast(s)) if (size > dc->getSize()) replaceSymbol(s, f, n, size, sym, c); return s; } Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) { replaceSymbol(s, n, f); return s; } reportDuplicate(s, f); return nullptr; } Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, uint16_t machine) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) { replaceSymbol(s, name, id, machine); return s; } reportDuplicate(s, id->file); return nullptr; +} + +void SymbolTable::addLibcall(StringRef name) { + Symbol *sym = findUnderscore(name); + if (!sym) + return; + + if (Lazy *l = dyn_cast(sym)) { + MemoryBufferRef mb = l->getMemberBuffer(); + if (identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode) + addUndefined(sym->getName()); + } } std::vector SymbolTable::getChunks() { std::vector res; for (ObjFile *file : ObjFile::instances) { ArrayRef v = file->getChunks(); res.insert(res.end(), v.begin(), v.end()); } return res; } Symbol *SymbolTable::find(StringRef name) { return symMap.lookup(CachedHashStringRef(name)); } Symbol *SymbolTable::findUnderscore(StringRef name) { if (config->machine == I386) return find(("_" + name).str()); return find(name); } // Return all symbols that start with Prefix, possibly ignoring the first // character of Prefix or the first character symbol. std::vector SymbolTable::getSymsWithPrefix(StringRef prefix) { std::vector syms; for (auto pair : symMap) { StringRef name = pair.first.val(); if (name.startswith(prefix) || name.startswith(prefix.drop_front()) || name.drop_front().startswith(prefix) || name.drop_front().startswith(prefix.drop_front())) { syms.push_back(pair.second); } } return syms; } Symbol *SymbolTable::findMangle(StringRef name) { if (Symbol *sym = find(name)) if (!isa(sym)) return sym; // Efficient fuzzy string lookup is impossible with a hash table, so iterate // the symbol table once and collect all possibly matching symbols into this // vector. Then compare each possibly matching symbol with each possible // mangling. std::vector syms = getSymsWithPrefix(name); auto findByPrefix = [&syms](const Twine &t) -> Symbol * { std::string prefix = t.str(); for (auto *s : syms) if (s->getName().startswith(prefix)) return s; return nullptr; }; // For non-x86, just look for C++ functions. if (config->machine != I386) return findByPrefix("?" + name + "@@Y"); if (!name.startswith("_")) return nullptr; // Search for x86 stdcall function. if (Symbol *s = findByPrefix(name + "@")) return s; // Search for x86 fastcall function. if (Symbol *s = findByPrefix("@" + name.substr(1) + "@")) return s; // Search for x86 vectorcall function. if (Symbol *s = findByPrefix(name.substr(1) + "@@")) return s; // Search for x86 C++ non-member function. return findByPrefix("?" + name.substr(1) + "@@Y"); } Symbol *SymbolTable::addUndefined(StringRef name) { return addUndefined(name, nullptr, false); } std::vector SymbolTable::compileBitcodeFiles() { lto.reset(new BitcodeCompiler); for (BitcodeFile *f : BitcodeFile::instances) lto->add(*f); return lto->compile(); } void SymbolTable::addCombinedLTOObjects() { if (BitcodeFile::instances.empty()) return; ScopedTimer t(ltoTimer); for (StringRef object : compileBitcodeFiles()) { auto *obj = make(MemoryBufferRef(object, "lto.tmp")); obj->parse(); ObjFile::instances.push_back(obj); } } } // namespace coff } // namespace lld Index: vendor/lld/dist-release_90/COFF/SymbolTable.h =================================================================== --- vendor/lld/dist-release_90/COFF/SymbolTable.h (revision 351717) +++ vendor/lld/dist-release_90/COFF/SymbolTable.h (revision 351718) @@ -1,131 +1,132 @@ //===- SymbolTable.h --------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_SYMBOL_TABLE_H #define LLD_COFF_SYMBOL_TABLE_H #include "InputFiles.h" #include "LTO.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/raw_ostream.h" namespace llvm { struct LTOCodeGenerator; } namespace lld { namespace coff { class Chunk; class CommonChunk; class Defined; class DefinedAbsolute; class DefinedRegular; class DefinedRelative; class Lazy; class SectionChunk; class Symbol; // SymbolTable is a bucket of all known symbols, including defined, // undefined, or lazy symbols (the last one is symbols in archive // files whose archive members are not yet loaded). // // We put all symbols of all files to a SymbolTable, and the // SymbolTable selects the "best" symbols if there are name // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition // to replace the lazy symbol. The logic is implemented in the // add*() functions, which are called by input files as they are parsed. // There is one add* function per symbol type. class SymbolTable { public: void addFile(InputFile *file); // Try to resolve any undefined symbols and update the symbol table // accordingly, then print an error message for any remaining undefined // symbols. void reportRemainingUndefines(); void loadMinGWAutomaticImports(); bool handleMinGWAutomaticImport(Symbol *sym, StringRef name); // Returns a list of chunks of selected symbols. std::vector getChunks(); // Returns a symbol for a given name. Returns a nullptr if not found. Symbol *find(StringRef name); Symbol *findUnderscore(StringRef name); // Occasionally we have to resolve an undefined symbol to its // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. Symbol *findMangle(StringRef name); // Build a set of COFF objects representing the combined contents of // BitcodeFiles and add them to the symbol table. Called after all files are // added and before the writer writes results to a file. void addCombinedLTOObjects(); std::vector compileBitcodeFiles(); // Creates an Undefined symbol for a given name. Symbol *addUndefined(StringRef name); Symbol *addSynthetic(StringRef n, Chunk *c); Symbol *addAbsolute(StringRef n, uint64_t va); Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias); - void addLazy(ArchiveFile *f, const Archive::Symbol sym); + void addLazy(ArchiveFile *f, const Archive::Symbol &sym); Symbol *addAbsolute(StringRef n, COFFSymbolRef s); Symbol *addRegular(InputFile *f, StringRef n, const llvm::object::coff_symbol_generic *s = nullptr, SectionChunk *c = nullptr); std::pair addComdat(InputFile *f, StringRef n, const llvm::object::coff_symbol_generic *s = nullptr); Symbol *addCommon(InputFile *f, StringRef n, uint64_t size, const llvm::object::coff_symbol_generic *s = nullptr, CommonChunk *c = nullptr); Symbol *addImportData(StringRef n, ImportFile *f); Symbol *addImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); + void addLibcall(StringRef name); void reportDuplicate(Symbol *existing, InputFile *newFile); // A list of chunks which to be added to .rdata. std::vector localImportChunks; // Iterates symbols in non-determinstic hash table order. template void forEachSymbol(T callback) { for (auto &pair : symMap) callback(pair.second); } private: /// Inserts symbol if not already present. std::pair insert(StringRef name); /// Same as insert(Name), but also sets isUsedInRegularObj. std::pair insert(StringRef name, InputFile *f); std::vector getSymsWithPrefix(StringRef prefix); llvm::DenseMap symMap; std::unique_ptr lto; }; extern SymbolTable *symtab; std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex); } // namespace coff } // namespace lld #endif Index: vendor/lld/dist-release_90/COFF/Symbols.cpp =================================================================== --- vendor/lld/dist-release_90/COFF/Symbols.cpp (revision 351717) +++ vendor/lld/dist-release_90/COFF/Symbols.cpp (revision 351718) @@ -1,117 +1,131 @@ //===- Symbols.cpp --------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "InputFiles.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::object; using namespace lld::coff; +namespace lld { + static_assert(sizeof(SymbolUnion) <= 48, "symbols should be optimized for memory usage"); // Returns a symbol name for an error message. -std::string lld::toString(coff::Symbol &b) { +static std::string demangle(StringRef symName) { if (config->demangle) - if (Optional s = lld::demangleMSVC(b.getName())) + if (Optional s = demangleMSVC(symName)) return *s; - return b.getName(); + return symName; } +std::string toString(coff::Symbol &b) { return demangle(b.getName()); } +std::string toCOFFString(const Archive::Symbol &b) { + return demangle(b.getName()); +} -namespace lld { namespace coff { StringRef Symbol::getName() { // COFF symbol names are read lazily for a performance reason. // Non-external symbol names are never used by the linker except for logging // or debugging. Their internal references are resolved not by name but by // symbol index. And because they are not external, no one can refer them by // name. Object files contain lots of non-external symbols, and creating // StringRefs for them (which involves lots of strlen() on the string table) // is a waste of time. if (nameData == nullptr) { auto *d = cast(this); StringRef nameStr; cast(d->file)->getCOFFObj()->getSymbolName(d->sym, nameStr); nameData = nameStr.data(); nameSize = nameStr.size(); assert(nameSize == nameStr.size() && "name length truncated"); } return StringRef(nameData, nameSize); } InputFile *Symbol::getFile() { if (auto *sym = dyn_cast(this)) return sym->file; if (auto *sym = dyn_cast(this)) return sym->file; return nullptr; } bool Symbol::isLive() const { if (auto *r = dyn_cast(this)) return r->getChunk()->live; if (auto *imp = dyn_cast(this)) return imp->file->live; if (auto *imp = dyn_cast(this)) return imp->wrappedSym->file->thunkLive; // Assume any other kind of symbol is live. return true; } // MinGW specific. void Symbol::replaceKeepingName(Symbol *other, size_t size) { StringRef origName = getName(); memcpy(this, other, size); nameData = origName.data(); nameSize = origName.size(); } COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t symSize = cast(file)->getCOFFObj()->getSymbolTableEntrySize(); if (symSize == sizeof(coff_symbol16)) return COFFSymbolRef(reinterpret_cast(sym)); assert(symSize == sizeof(coff_symbol32)); return COFFSymbolRef(reinterpret_cast(sym)); } uint16_t DefinedAbsolute::numOutputSections; static Chunk *makeImportThunk(DefinedImportData *s, uint16_t machine) { if (machine == AMD64) return make(s); if (machine == I386) return make(s); if (machine == ARM64) return make(s); assert(machine == ARMNT); return make(s); } DefinedImportThunk::DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine) : Defined(DefinedImportThunkKind, name), wrappedSym(s), data(makeImportThunk(s, machine)) {} Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. for (Symbol *a = weakAlias; a; a = cast(a)->weakAlias) if (auto *d = dyn_cast(a)) return d; return nullptr; +} + +MemoryBufferRef Lazy::getMemberBuffer() { + Archive::Child c = + CHECK(sym.getMember(), + "could not get the member for symbol " + toCOFFString(sym)); + return CHECK(c.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + toCOFFString(sym)); } } // namespace coff } // namespace lld Index: vendor/lld/dist-release_90/COFF/Symbols.h =================================================================== --- vendor/lld/dist-release_90/COFF/Symbols.h (revision 351717) +++ vendor/lld/dist-release_90/COFF/Symbols.h (revision 351718) @@ -1,435 +1,444 @@ //===- Symbols.h ------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_SYMBOLS_H #define LLD_COFF_SYMBOLS_H #include "Chunks.h" #include "Config.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include #include #include namespace lld { + +std::string toString(coff::Symbol &b); + +// There are two different ways to convert an Archive::Symbol to a string: +// One for Microsoft name mangling and one for Itanium name mangling. +// Call the functions toCOFFString and toELFString, not just toString. +std::string toCOFFString(const coff::Archive::Symbol &b); + namespace coff { using llvm::object::Archive; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_symbol_generic; class ArchiveFile; class InputFile; class ObjFile; class SymbolTable; // The base class for real symbol classes. class Symbol { public: enum Kind { // The order of these is significant. We start with the regular defined // symbols as those are the most prevalent and the zero tag is the cheapest // to set. Among the defined kinds, the lower the kind is preferred over // the higher kind when testing whether one symbol should take precedence // over another. DefinedRegularKind = 0, DefinedCommonKind, DefinedLocalImportKind, DefinedImportThunkKind, DefinedImportDataKind, DefinedAbsoluteKind, DefinedSyntheticKind, UndefinedKind, LazyKind, LastDefinedCOFFKind = DefinedCommonKind, LastDefinedKind = DefinedSyntheticKind, }; Kind kind() const { return static_cast(symbolKind); } // Returns the symbol name. StringRef getName(); void replaceKeepingName(Symbol *other, size_t size); // Returns the file from which this symbol was created. InputFile *getFile(); // Indicates that this symbol will be included in the final image. Only valid // after calling markLive. bool isLive() const; protected: friend SymbolTable; explicit Symbol(Kind k, StringRef n = "") : symbolKind(k), isExternal(true), isCOMDAT(false), writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false), nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {} const unsigned symbolKind : 8; unsigned isExternal : 1; public: // This bit is used by the \c DefinedRegular subclass. unsigned isCOMDAT : 1; // This bit is used by Writer::createSymbolAndStringTable() to prevent // symbols from being written to the symbol table more than once. unsigned writtenToSymtab : 1; // True if this symbol was referenced by a regular (non-bitcode) object. unsigned isUsedInRegularObj : 1; // True if we've seen both a lazy and an undefined symbol with this symbol // name, which means that we have enqueued an archive member load and should // not load any more archive members to resolve the same symbol. unsigned pendingArchiveLoad : 1; /// True if we've already added this symbol to the list of GC roots. unsigned isGCRoot : 1; unsigned isRuntimePseudoReloc : 1; protected: // Symbol name length. Assume symbol lengths fit in a 32-bit integer. uint32_t nameSize; const char *nameData; }; // The base class for any defined symbols, including absolute symbols, // etc. class Defined : public Symbol { public: Defined(Kind k, StringRef n) : Symbol(k, n) {} static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } // Returns the RVA (relative virtual address) of this symbol. The // writer sets and uses RVAs. uint64_t getRVA(); // Returns the chunk containing this symbol. Absolute symbols and __ImageBase // do not have chunks, so this may return null. Chunk *getChunk(); }; // Symbols defined via a COFF object file or bitcode file. For COFF files, this // stores a coff_symbol_generic*, and names of internal symbols are lazily // loaded through that. For bitcode files, Sym is nullptr and the name is stored // as a decomposed StringRef. class DefinedCOFF : public Defined { friend Symbol; public: DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) : Defined(k, n), file(f), sym(s) {} static bool classof(const Symbol *s) { return s->kind() <= LastDefinedCOFFKind; } InputFile *getFile() { return file; } COFFSymbolRef getCOFFSymbol(); InputFile *file; protected: const coff_symbol_generic *sym; }; // Regular defined symbols read from object file symbol tables. class DefinedRegular : public DefinedCOFF { public: DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, bool isExternal = false, const coff_symbol_generic *s = nullptr, SectionChunk *c = nullptr) : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { this->isExternal = isExternal; this->isCOMDAT = isCOMDAT; } static bool classof(const Symbol *s) { return s->kind() == DefinedRegularKind; } uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } SectionChunk *getChunk() const { return *data; } uint32_t getValue() const { return sym->Value; } SectionChunk **data; }; class DefinedCommon : public DefinedCOFF { public: DefinedCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *s = nullptr, CommonChunk *c = nullptr) : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { this->isExternal = true; } static bool classof(const Symbol *s) { return s->kind() == DefinedCommonKind; } uint64_t getRVA() { return data->getRVA(); } CommonChunk *getChunk() { return data; } private: friend SymbolTable; uint64_t getSize() const { return size; } CommonChunk *data; uint64_t size; }; // Absolute symbols. class DefinedAbsolute : public Defined { public: DefinedAbsolute(StringRef n, COFFSymbolRef s) : Defined(DefinedAbsoluteKind, n), va(s.getValue()) { isExternal = s.isExternal(); } DefinedAbsolute(StringRef n, uint64_t v) : Defined(DefinedAbsoluteKind, n), va(v) {} static bool classof(const Symbol *s) { return s->kind() == DefinedAbsoluteKind; } uint64_t getRVA() { return va - config->imageBase; } void setVA(uint64_t v) { va = v; } // Section index relocations against absolute symbols resolve to // this 16 bit number, and it is the largest valid section index // plus one. This variable keeps it. static uint16_t numOutputSections; private: uint64_t va; }; // This symbol is used for linker-synthesized symbols like __ImageBase and // __safe_se_handler_table. class DefinedSynthetic : public Defined { public: explicit DefinedSynthetic(StringRef name, Chunk *c) : Defined(DefinedSyntheticKind, name), c(c) {} static bool classof(const Symbol *s) { return s->kind() == DefinedSyntheticKind; } // A null chunk indicates that this is __ImageBase. Otherwise, this is some // other synthesized chunk, like SEHTableChunk. uint32_t getRVA() { return c ? c->getRVA() : 0; } Chunk *getChunk() { return c; } private: Chunk *c; }; // This class represents a symbol defined in an archive file. It is // created from an archive file header, and it knows how to load an // object file from an archive to replace itself with a defined // symbol. If the resolver finds both Undefined and Lazy for // the same name, it will ask the Lazy to load a file. class Lazy : public Symbol { public: Lazy(ArchiveFile *f, const Archive::Symbol s) : Symbol(LazyKind, s.getName()), file(f), sym(s) {} static bool classof(const Symbol *s) { return s->kind() == LazyKind; } + MemoryBufferRef getMemberBuffer(); + ArchiveFile *file; private: friend SymbolTable; private: const Archive::Symbol sym; }; // Undefined symbols. class Undefined : public Symbol { public: explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } // An undefined symbol can have a fallback symbol which gives an // undefined symbol a second chance if it would remain undefined. // If it remains undefined, it'll be replaced with whatever the // Alias pointer points to. Symbol *weakAlias = nullptr; // If this symbol is external weak, try to resolve it to a defined // symbol by searching the chain of fallback symbols. Returns the symbol if // successful, otherwise returns null. Defined *getWeakAlias(); }; // Windows-specific classes. // This class represents a symbol imported from a DLL. This has two // names for internal use and external use. The former is used for // name resolution, and the latter is used for the import descriptor // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: DefinedImportData(StringRef n, ImportFile *f) : Defined(DefinedImportDataKind, n), file(f) { } static bool classof(const Symbol *s) { return s->kind() == DefinedImportDataKind; } uint64_t getRVA() { return file->location->getRVA(); } Chunk *getChunk() { return file->location; } void setLocation(Chunk *addressTable) { file->location = addressTable; } StringRef getDLLName() { return file->dllName; } StringRef getExternalName() { return file->externalName; } uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; }; // This class represents a symbol for a jump table entry which jumps // to a function in a DLL. Linker are supposed to create such symbols // without "__imp_" prefix for all function symbols exported from // DLLs, so that you can call DLL functions as regular functions with // a regular name. A function pointer is given as a DefinedImportData. class DefinedImportThunk : public Defined { public: DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); static bool classof(const Symbol *s) { return s->kind() == DefinedImportThunkKind; } uint64_t getRVA() { return data->getRVA(); } Chunk *getChunk() { return data; } DefinedImportData *wrappedSym; private: Chunk *data; }; // If you have a symbol "foo" in your object file, a symbol name // "__imp_foo" becomes automatically available as a pointer to "foo". // This class is for such automatically-created symbols. // Yes, this is an odd feature. We didn't intend to implement that. // This is here just for compatibility with MSVC. class DefinedLocalImport : public Defined { public: DefinedLocalImport(StringRef n, Defined *s) : Defined(DefinedLocalImportKind, n), data(make(s)) {} static bool classof(const Symbol *s) { return s->kind() == DefinedLocalImportKind; } uint64_t getRVA() { return data->getRVA(); } Chunk *getChunk() { return data; } private: LocalImportChunk *data; }; inline uint64_t Defined::getRVA() { switch (kind()) { case DefinedAbsoluteKind: return cast(this)->getRVA(); case DefinedSyntheticKind: return cast(this)->getRVA(); case DefinedImportDataKind: return cast(this)->getRVA(); case DefinedImportThunkKind: return cast(this)->getRVA(); case DefinedLocalImportKind: return cast(this)->getRVA(); case DefinedCommonKind: return cast(this)->getRVA(); case DefinedRegularKind: return cast(this)->getRVA(); case LazyKind: case UndefinedKind: llvm_unreachable("Cannot get the address for an undefined symbol."); } llvm_unreachable("unknown symbol kind"); } inline Chunk *Defined::getChunk() { switch (kind()) { case DefinedRegularKind: return cast(this)->getChunk(); case DefinedAbsoluteKind: return nullptr; case DefinedSyntheticKind: return cast(this)->getChunk(); case DefinedImportDataKind: return cast(this)->getChunk(); case DefinedImportThunkKind: return cast(this)->getChunk(); case DefinedLocalImportKind: return cast(this)->getChunk(); case DefinedCommonKind: return cast(this)->getChunk(); case LazyKind: case UndefinedKind: llvm_unreachable("Cannot get the chunk of an undefined symbol."); } llvm_unreachable("unknown symbol kind"); } // A buffer class that is large enough to hold any Symbol-derived // object. We allocate memory using this class and instantiate a symbol // using the placement new. union SymbolUnion { alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; alignas(Lazy) char e[sizeof(Lazy)]; alignas(Undefined) char f[sizeof(Undefined)]; alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; }; template void replaceSymbol(Symbol *s, ArgT &&... arg) { static_assert(std::is_trivially_destructible(), "Symbol types must be trivially destructible"); static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); assert(static_cast(static_cast(nullptr)) == nullptr && "Not a Symbol"); new (s) T(std::forward(arg)...); } } // namespace coff -std::string toString(coff::Symbol &b); } // namespace lld #endif Index: vendor/lld/dist-release_90/COFF/Writer.cpp =================================================================== --- vendor/lld/dist-release_90/COFF/Writer.cpp (revision 351717) +++ vendor/lld/dist-release_90/COFF/Writer.cpp (revision 351718) @@ -1,1927 +1,1932 @@ //===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" #include "MapFile.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/xxhash.h" #include #include #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm org 0 ; Copy cs to ds. push cs pop ds ; Point ds:dx at the $-terminated string. mov dx, str ; Int 21/AH=09h: Write string to standard output. mov ah, 0x9 int 0x21 ; Int 21/AH=4Ch: Exit with return code (in AL). mov ax, 0x4C01 int 0x21 str: db 'This program cannot be run in DOS mode.$' align 8, db 0 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin $ xxd -i /tmp/DOSProgram.bin */ static unsigned char dosProgram[] = { 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00 }; static_assert(sizeof(dosProgram) % 8 == 0, "DOSProgram size must be multiple of 8"); static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram); static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8"); static const int numberOfDataDirectory = 16; // Global vector of all output sections. After output sections are finalized, // this can be indexed by Chunk::getOutputSection. static std::vector outputSections; OutputSection *Chunk::getOutputSection() const { return osidx == 0 ? nullptr : outputSections[osidx - 1]; } namespace { class DebugDirectoryChunk : public NonSectionChunk { public: DebugDirectoryChunk(const std::vector &r, bool writeRepro) : records(r), writeRepro(writeRepro) {} size_t getSize() const override { return (records.size() + int(writeRepro)) * sizeof(debug_directory); } void writeTo(uint8_t *b) const override { auto *d = reinterpret_cast(b); for (const Chunk *record : records) { OutputSection *os = record->getOutputSection(); uint64_t offs = os->getFileOff() + (record->getRVA() - os->getRVA()); fillEntry(d, COFF::IMAGE_DEBUG_TYPE_CODEVIEW, record->getSize(), record->getRVA(), offs); ++d; } if (writeRepro) { // FIXME: The COFF spec allows either a 0-sized entry to just say // "the timestamp field is really a hash", or a 4-byte size field // followed by that many bytes containing a longer hash (with the // lowest 4 bytes usually being the timestamp in little-endian order). // Consider storing the full 8 bytes computed by xxHash64 here. fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0); } } void setTimeDateStamp(uint32_t timeDateStamp) { for (support::ulittle32_t *tds : timeDateStamps) *tds = timeDateStamp; } private: void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size, uint64_t rva, uint64_t offs) const { d->Characteristics = 0; d->TimeDateStamp = 0; d->MajorVersion = 0; d->MinorVersion = 0; d->Type = debugType; d->SizeOfData = size; d->AddressOfRawData = rva; d->PointerToRawData = offs; timeDateStamps.push_back(&d->TimeDateStamp); } mutable std::vector timeDateStamps; const std::vector &records; bool writeRepro; }; class CVDebugRecordChunk : public NonSectionChunk { public: size_t getSize() const override { return sizeof(codeview::DebugInfo) + config->pdbAltPath.size() + 1; } void writeTo(uint8_t *b) const override { // Save off the DebugInfo entry to backfill the file signature (build id) // in Writer::writeBuildId buildId = reinterpret_cast(b); // variable sized field (PDB Path) char *p = reinterpret_cast(b + sizeof(*buildId)); if (!config->pdbAltPath.empty()) memcpy(p, config->pdbAltPath.data(), config->pdbAltPath.size()); p[config->pdbAltPath.size()] = '\0'; } mutable codeview::DebugInfo *buildId = nullptr; }; // PartialSection represents a group of chunks that contribute to an // OutputSection. Collating a collection of PartialSections of same name and // characteristics constitutes the OutputSection. class PartialSectionKey { public: StringRef name; unsigned characteristics; bool operator<(const PartialSectionKey &other) const { int c = name.compare(other.name); if (c == 1) return false; if (c == 0) return characteristics < other.characteristics; return true; } }; // The writer writes a SymbolTable result to a file. class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} void run(); private: void createSections(); void createMiscChunks(); void createImportTables(); void appendImportThunks(); void locateImportTables(); void createExportTable(); void mergeSections(); void removeUnusedSections(); void assignAddresses(); void finalizeAddresses(); void removeEmptySections(); void assignOutputSectionIndices(); void createSymbolAndStringTable(); void openFile(StringRef outputPath); template void writeHeader(); void createSEHTable(); void createRuntimePseudoRelocs(); void insertCtorDtorSymbols(); void createGuardCFTables(); void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym); void setSectionPermissions(); void writeSections(); void writeBuildId(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); void fixPartialSectionChars(StringRef name, uint32_t chars); bool fixGnuImportChunks(); PartialSection *createPartialSection(StringRef name, uint32_t outChars); PartialSection *findPartialSection(StringRef name, uint32_t outChars); llvm::Optional createSymbol(Defined *d); size_t addEntryToStringTable(StringRef str); OutputSection *findSection(StringRef name); void addBaserels(); void addBaserelBlocks(std::vector &v); uint32_t getSizeOfInitializedData(); std::unique_ptr &buffer; std::map partialSections; std::vector strtab; std::vector outputSymtab; IdataContents idata; Chunk *importTableStart = nullptr; uint64_t importTableSize = 0; Chunk *iatStart = nullptr; uint64_t iatSize = 0; DelayLoadContents delayIdata; EdataContents edata; bool setNoSEHCharacteristic = false; DebugDirectoryChunk *debugDirectory = nullptr; std::vector debugRecords; CVDebugRecordChunk *buildId = nullptr; ArrayRef sectionTable; uint64_t fileSize; uint32_t pointerToSymbolTable = 0; uint64_t sizeOfImage; uint64_t sizeOfHeaders; OutputSection *textSec; OutputSection *rdataSec; OutputSection *buildidSec; OutputSection *dataSec; OutputSection *pdataSec; OutputSection *idataSec; OutputSection *edataSec; OutputSection *didatSec; OutputSection *rsrcSec; OutputSection *relocSec; OutputSection *ctorsSec; OutputSection *dtorsSec; // The first and last .pdata sections in the output file. // // We need to keep track of the location of .pdata in whichever section it // gets merged into so that we can sort its contents and emit a correct data // directory entry for the exception table. This is also the case for some // other sections (such as .edata) but because the contents of those sections // are entirely linker-generated we can keep track of their locations using // the chunks that the linker creates. All .pdata chunks come from input // files, so we need to keep track of them separately. Chunk *firstPdata = nullptr; Chunk *lastPdata; }; } // anonymous namespace namespace lld { namespace coff { static Timer codeLayoutTimer("Code Layout", Timer::root()); static Timer diskCommitTimer("Commit Output File", Timer::root()); void writeResult() { Writer().run(); } void OutputSection::addChunk(Chunk *c) { chunks.push_back(c); } void OutputSection::insertChunkAtStart(Chunk *c) { chunks.insert(chunks.begin(), c); } void OutputSection::setPermissions(uint32_t c) { header.Characteristics &= ~permMask; header.Characteristics |= c; } void OutputSection::merge(OutputSection *other) { chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end()); other->chunks.clear(); contribSections.insert(contribSections.end(), other->contribSections.begin(), other->contribSections.end()); other->contribSections.clear(); } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *buf) { auto *hdr = reinterpret_cast(buf); *hdr = header; if (stringTableOff) { // If name is too long, write offset into the string table as a name. sprintf(hdr->Name, "/%d", stringTableOff); } else { assert(!config->debug || name.size() <= COFF::NameSize || (hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0); strncpy(hdr->Name, name.data(), std::min(name.size(), (size_t)COFF::NameSize)); } } void OutputSection::addContributingPartialSection(PartialSection *sec) { contribSections.push_back(sec); } } // namespace coff } // namespace lld // Check whether the target address S is in range from a relocation // of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { if (config->machine == ARMNT) { int64_t diff = AbsoluteDifference(s, p + 4) + margin; switch (relType) { case IMAGE_REL_ARM_BRANCH20T: return isInt<21>(diff); case IMAGE_REL_ARM_BRANCH24T: case IMAGE_REL_ARM_BLX23T: return isInt<25>(diff); default: return true; } } else if (config->machine == ARM64) { int64_t diff = AbsoluteDifference(s, p) + margin; switch (relType) { case IMAGE_REL_ARM64_BRANCH26: return isInt<28>(diff); case IMAGE_REL_ARM64_BRANCH19: return isInt<21>(diff); case IMAGE_REL_ARM64_BRANCH14: return isInt<16>(diff); default: return true; } } else { llvm_unreachable("Unexpected architecture"); } } // Return the last thunk for the given target if it is in range, // or create a new one. static std::pair getThunk(DenseMap &lastThunks, Defined *target, uint64_t p, uint16_t type, int margin) { Defined *&lastThunk = lastThunks[target->getRVA()]; if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin)) return {lastThunk, false}; Chunk *c; switch (config->machine) { case ARMNT: c = make(target); break; case ARM64: c = make(target); break; default: llvm_unreachable("Unexpected architecture"); } Defined *d = make("", c); lastThunk = d; return {d, true}; } // This checks all relocations, and for any relocation which isn't in range // it adds a thunk after the section chunk that contains the relocation. // If the latest thunk for the specific target is in range, that is used // instead of creating a new thunk. All range checks are done with the // specified margin, to make sure that relocations that originally are in // range, but only barely, also get thunks - in case other added thunks makes // the target go out of range. // // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. static bool createThunks(OutputSection *os, int margin) { bool addressesChanged = false; DenseMap lastThunks; DenseMap, uint32_t> thunkSymtabIndices; size_t thunksSize = 0; // Recheck Chunks.size() each iteration, since we can insert more // elements into it. for (size_t i = 0; i != os->chunks.size(); ++i) { SectionChunk *sc = dyn_cast_or_null(os->chunks[i]); if (!sc) continue; size_t thunkInsertionSpot = i + 1; // Try to get a good enough estimate of where new thunks will be placed. // Offset this by the size of the new thunks added so far, to make the // estimate slightly better. size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize; ObjFile *file = sc->file; std::vector> relocReplacements; ArrayRef originalRelocs = file->getCOFFObj()->getRelocations(sc->header); for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) { const coff_relocation &rel = originalRelocs[j]; Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex); // The estimate of the source address P should be pretty accurate, // but we don't know whether the target Symbol address should be // offset by thunksSize or not (or by some of thunksSize but not all of // it), giving us some uncertainty once we have added one thunk. uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize; Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t s = sym->getRVA(); if (isInRange(rel.Type, s, p, margin)) continue; // If the target isn't in range, hook it up to an existing or new // thunk. Defined *thunk; bool wasNew; std::tie(thunk, wasNew) = getThunk(lastThunks, sym, p, rel.Type, margin); if (wasNew) { Chunk *thunkChunk = thunk->getChunk(); thunkChunk->setRVA( thunkInsertionRVA); // Estimate of where it will be located. os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk); thunkInsertionSpot++; thunksSize += thunkChunk->getSize(); thunkInsertionRVA += thunkChunk->getSize(); addressesChanged = true; } // To redirect the relocation, add a symbol to the parent object file's // symbol table, and replace the relocation symbol table index with the // new index. auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U}); uint32_t &thunkSymbolIndex = insertion.first->second; if (insertion.second) thunkSymbolIndex = file->addRangeThunkSymbol(thunk); relocReplacements.push_back({j, thunkSymbolIndex}); } // Get a writable copy of this section's relocations so they can be // modified. If the relocations point into the object file, allocate new // memory. Otherwise, this must be previously allocated memory that can be // modified in place. ArrayRef curRelocs = sc->getRelocs(); MutableArrayRef newRelocs; if (originalRelocs.data() == curRelocs.data()) { newRelocs = makeMutableArrayRef( bAlloc.Allocate(originalRelocs.size()), originalRelocs.size()); } else { newRelocs = makeMutableArrayRef( const_cast(curRelocs.data()), curRelocs.size()); } // Copy each relocation, but replace the symbol table indices which need // thunks. auto nextReplacement = relocReplacements.begin(); auto endReplacement = relocReplacements.end(); for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) { newRelocs[i] = originalRelocs[i]; if (nextReplacement != endReplacement && nextReplacement->first == i) { newRelocs[i].SymbolTableIndex = nextReplacement->second; ++nextReplacement; } } sc->setRelocs(newRelocs); } return addressesChanged; } // Verify that all relocations are in range, with no extra margin requirements. static bool verifyRanges(const std::vector chunks) { for (Chunk *c : chunks) { SectionChunk *sc = dyn_cast_or_null(c); if (!sc) continue; ArrayRef relocs = sc->getRelocs(); for (size_t j = 0, e = relocs.size(); j < e; ++j) { const coff_relocation &rel = relocs[j]; Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex); Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t p = sc->getRVA() + rel.VirtualAddress; uint64_t s = sym->getRVA(); if (!isInRange(rel.Type, s, p, 0)) return false; } } return true; } // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { assignAddresses(); if (config->machine != ARMNT && config->machine != ARM64) return; size_t origNumChunks = 0; for (OutputSection *sec : outputSections) { sec->origChunks = sec->chunks; origNumChunks += sec->chunks.size(); } int pass = 0; int margin = 1024 * 100; while (true) { // First check whether we need thunks at all, or if the previous pass of // adding them turned out ok. bool rangesOk = true; size_t numChunks = 0; for (OutputSection *sec : outputSections) { if (!verifyRanges(sec->chunks)) { rangesOk = false; break; } numChunks += sec->chunks.size(); } if (rangesOk) { if (pass > 0) log("Added " + Twine(numChunks - origNumChunks) + " thunks with " + "margin " + Twine(margin) + " in " + Twine(pass) + " passes"); return; } if (pass >= 10) fatal("adding thunks hasn't converged after " + Twine(pass) + " passes"); if (pass > 0) { // If the previous pass didn't work out, reset everything back to the // original conditions before retrying with a wider margin. This should // ideally never happen under real circumstances. for (OutputSection *sec : outputSections) sec->chunks = sec->origChunks; margin *= 2; } // Try adding thunks everywhere where it is needed, with a margin // to avoid things going out of range due to the added thunks. bool addressesChanged = false; for (OutputSection *sec : outputSections) addressesChanged |= createThunks(sec, margin); // If the verification above thought we needed thunks, we should have // added some. assert(addressesChanged); // Recalculate the layout for the whole image (and verify the ranges at // the start of the next round). assignAddresses(); pass++; } } // The main function of the writer. void Writer::run() { ScopedTimer t1(codeLayoutTimer); createImportTables(); createSections(); createMiscChunks(); appendImportThunks(); createExportTable(); mergeSections(); removeUnusedSections(); finalizeAddresses(); removeEmptySections(); assignOutputSectionIndices(); setSectionPermissions(); createSymbolAndStringTable(); if (fileSize > UINT32_MAX) fatal("image size (" + Twine(fileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); openFile(config->outputFile); if (config->is64()) { writeHeader(); } else { writeHeader(); } writeSections(); sortExceptionTable(); t1.stop(); if (!config->pdbPath.empty() && config->debug) { assert(buildId); createPDB(symtab, outputSections, sectionTable, buildId->buildId); } writeBuildId(); writeMapFile(outputSections); + if (errorCount()) + return; + ScopedTimer t2(diskCommitTimer); if (auto e = buffer->commit()) fatal("failed to write the output file: " + toString(std::move(e))); } static StringRef getOutputSectionName(StringRef name) { StringRef s = name.split('$').first; // Treat a later period as a separator for MinGW, for sections like // ".ctors.01234". return s.substr(0, s.find('.', 1)); } // For /order. static void sortBySectionOrder(std::vector &chunks) { auto getPriority = [](const Chunk *c) { if (auto *sec = dyn_cast(c)) if (sec->sym) return config->order.lookup(sec->sym->getName()); return 0; }; llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) { return getPriority(a) < getPriority(b); }); } // Change the characteristics of existing PartialSections that belong to the // section Name to Chars. void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef curName = pSec->name; if (!curName.consume_front(name) || (!curName.empty() && !curName.startswith("$"))) continue; if (pSec->characteristics == chars) continue; PartialSection *destSec = createPartialSection(pSec->name, chars); destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(), pSec->chunks.end()); pSec->chunks.clear(); } } // Sort concrete section chunks from GNU import libraries. // // GNU binutils doesn't use short import files, but instead produces import // libraries that consist of object files, with section chunks for the .idata$* // sections. These are linked just as regular static libraries. Each import // library consists of one header object, one object file for every imported // symbol, and one trailer object. In order for the .idata tables/lists to // be formed correctly, the section chunks within each .idata$* section need // to be grouped by library, and sorted alphabetically within each library // (which makes sure the header comes first and the trailer last). bool Writer::fixGnuImportChunks() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; // Make sure all .idata$* section chunks are mapped as RDATA in order to // be sorted into the same sections as our own synthesized .idata chunks. fixPartialSectionChars(".idata", rdata); bool hasIdata = false; // Sort all .idata$* chunks, grouping chunks from the same library, // with alphabetical ordering of the object fils within a library. for (auto it : partialSections) { PartialSection *pSec = it.second; if (!pSec->name.startswith(".idata")) continue; if (!pSec->chunks.empty()) hasIdata = true; llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) { SectionChunk *sc1 = dyn_cast_or_null(s); SectionChunk *sc2 = dyn_cast_or_null(t); if (!sc1 || !sc2) { // if SC1, order them ascending. If SC2 or both null, // S is not less than T. return sc1 != nullptr; } // Make a string with "libraryname/objectfile" for sorting, achieving // both grouping by library and sorting of objects within a library, // at once. std::string key1 = (sc1->file->parentName + "/" + sc1->file->getName()).str(); std::string key2 = (sc2->file->parentName + "/" + sc2->file->getName()).str(); return key1 < key2; }); } return hasIdata; } // Add generated idata chunks, for imported symbols and DLLs, and a // terminator in .idata$2. void Writer::addSyntheticIdata() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; idata.create(); // Add the .idata content in the right section groups, to allow // chunks from other linked in object files to be grouped together. // See Microsoft PE/COFF spec 5.4 for details. auto add = [&](StringRef n, std::vector &v) { PartialSection *pSec = createPartialSection(n, rdata); pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end()); }; // The loader assumes a specific order of data. // Add each type in the correct order. add(".idata$2", idata.dirs); add(".idata$4", idata.lookups); add(".idata$5", idata.addresses); add(".idata$6", idata.hints); add(".idata$7", idata.dllNames); } // Locate the first Chunk and size of the import directory list and the // IAT. void Writer::locateImportTables() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) { if (!importDirs->chunks.empty()) importTableStart = importDirs->chunks.front(); for (Chunk *c : importDirs->chunks) importTableSize += c->getSize(); } if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) { if (!importAddresses->chunks.empty()) iatStart = importAddresses->chunks.front(); for (Chunk *c : importAddresses->chunks) iatSize += c->getSize(); } } // Return whether a SectionChunk's suffix (the dollar and any trailing // suffix) should be removed and sorted into the main suffixless // PartialSection. static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { // On MinGW, comdat groups are formed by putting the comdat group name // after the '$' in the section name. For .eh_frame$, that must // still be sorted before the .eh_frame trailer from crtend.o, thus just // strip the section name trailer. For other sections, such as // .tls$$ (where non-comdat .tls symbols are otherwise stored in // ".tls$"), they must be strictly sorted after .tls. And for the // hypothetical case of comdat .CRT$XCU, we definitely need to keep the // suffix for sorting. Thus, to play it safe, only strip the suffix for // the standard sections. if (!config->mingw) return false; if (!sc || !sc->isCOMDAT()) return false; return name.startswith(".text$") || name.startswith(".data$") || name.startswith(".rdata$") || name.startswith(".pdata$") || name.startswith(".xdata$") || name.startswith(".eh_frame$"); } // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA; const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const uint32_t code = IMAGE_SCN_CNT_CODE; const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE; const uint32_t r = IMAGE_SCN_MEM_READ; const uint32_t w = IMAGE_SCN_MEM_WRITE; const uint32_t x = IMAGE_SCN_MEM_EXECUTE; SmallDenseMap, OutputSection *> sections; auto createSection = [&](StringRef name, uint32_t outChars) { OutputSection *&sec = sections[{name, outChars}]; if (!sec) { sec = make(name, outChars); outputSections.push_back(sec); } return sec; }; // Try to match the section order used by link.exe. textSec = createSection(".text", code | r | x); createSection(".bss", bss | r | w); rdataSec = createSection(".rdata", data | r); buildidSec = createSection(".buildid", data | r); dataSec = createSection(".data", data | r | w); pdataSec = createSection(".pdata", data | r); idataSec = createSection(".idata", data | r); edataSec = createSection(".edata", data | r); didatSec = createSection(".didat", data | r); rsrcSec = createSection(".rsrc", data | r); relocSec = createSection(".reloc", data | discardable | r); ctorsSec = createSection(".ctors", data | r | w); dtorsSec = createSection(".dtors", data | r | w); // Then bin chunks by name and output characteristics. for (Chunk *c : symtab->getChunks()) { auto *sc = dyn_cast(c); if (sc && !sc->live) { if (config->verbose) sc->printDiscardedMessage(); continue; } StringRef name = c->getSectionName(); if (shouldStripSectionSuffix(sc, name)) name = name.split('$').first; PartialSection *pSec = createPartialSection(name, c->getOutputCharacteristics()); pSec->chunks.push_back(c); } fixPartialSectionChars(".rsrc", data | r); // Even in non MinGW cases, we might need to link against GNU import // libraries. bool hasIdata = fixGnuImportChunks(); if (!idata.empty()) hasIdata = true; if (hasIdata) addSyntheticIdata(); // Process an /order option. if (!config->order.empty()) for (auto it : partialSections) sortBySectionOrder(it.second->chunks); if (hasIdata) locateImportTables(); // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef name = getOutputSectionName(pSec->name); uint32_t outChars = pSec->characteristics; if (name == ".CRT") { // In link.exe, there is a special case for the I386 target where .CRT // sections are treated as if they have output characteristics DATA | R if // their characteristics are DATA | R | W. This implements the same // special case for all architectures. outChars = data | r; log("Processing section " + pSec->name + " -> " + name); sortCRTSectionChunks(pSec->chunks); } OutputSection *sec = createSection(name, outChars); for (Chunk *c : pSec->chunks) sec->addChunk(c); sec->addContributingPartialSection(pSec); } // Finally, move some output sections to the end. auto sectionOrder = [&](const OutputSection *s) { // Move DISCARDABLE (or non-memory-mapped) sections to the end of file // because the loader cannot handle holes. Stripping can remove other // discardable ones than .reloc, which is first of them (created early). if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) return 2; // .rsrc should come at the end of the non-discardable sections because its // size may change by the Win32 UpdateResources() function, causing // subsequent sections to move (see https://crbug.com/827082). if (s == rsrcSec) return 1; return 0; }; llvm::stable_sort(outputSections, [&](const OutputSection *s, const OutputSection *t) { return sectionOrder(s) < sectionOrder(t); }); } void Writer::createMiscChunks() { for (MergeChunk *p : MergeChunk::instances) { if (p) { p->finalizeContents(); rdataSec->addChunk(p); } } // Create thunks for locally-dllimported symbols. if (!symtab->localImportChunks.empty()) { for (Chunk *c : symtab->localImportChunks) rdataSec->addChunk(c); } // Create Debug Information Chunks OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec; if (config->debug || config->repro) { debugDirectory = make(debugRecords, config->repro); debugInfoSec->addChunk(debugDirectory); } if (config->debug) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even // if we're ultimately not going to write CodeView data to the PDB. buildId = make(); debugRecords.push_back(buildId); for (Chunk *c : debugRecords) debugInfoSec->addChunk(c); } // Create SEH table. x86-only. if (config->safeSEH) createSEHTable(); // Create /guard:cf tables if requested. if (config->guardCF != GuardCFLevel::Off) createGuardCFTables(); if (config->mingw) { createRuntimePseudoRelocs(); insertCtorDtorSymbols(); } } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *file : ImportFile::instances) { if (!file->live) continue; std::string dll = StringRef(file->dllName).lower(); if (config->dllOrder.count(dll) == 0) config->dllOrder[dll] = config->dllOrder.size(); if (file->impSym && !isa(file->impSym)) fatal(toString(*file->impSym) + " was replaced"); DefinedImportData *impSym = cast_or_null(file->impSym); if (config->delayLoads.count(StringRef(file->dllName).lower())) { if (!file->thunkSym) fatal("cannot delay-load " + toString(file) + " due to import of data: " + toString(*impSym)); delayIdata.add(impSym); } else { idata.add(impSym); } } } void Writer::appendImportThunks() { if (ImportFile::instances.empty()) return; for (ImportFile *file : ImportFile::instances) { if (!file->live) continue; if (!file->thunkSym) continue; if (!isa(file->thunkSym)) fatal(toString(*file->thunkSym) + " was replaced"); DefinedImportThunk *thunk = cast(file->thunkSym); if (file->thunkLive) textSec->addChunk(thunk->getChunk()); } if (!delayIdata.empty()) { Defined *helper = cast(config->delayLoadHelper); delayIdata.create(helper); for (Chunk *c : delayIdata.getChunks()) didatSec->addChunk(c); for (Chunk *c : delayIdata.getDataChunks()) dataSec->addChunk(c); for (Chunk *c : delayIdata.getCodeChunks()) textSec->addChunk(c); } } void Writer::createExportTable() { if (config->exports.empty()) return; for (Chunk *c : edata.chunks) edataSec->addChunk(c); } void Writer::removeUnusedSections() { // Remove sections that we can be sure won't get content, to avoid // allocating space for their section headers. auto isUnused = [this](OutputSection *s) { if (s == relocSec) return false; // This section is populated later. // MergeChunks have zero size at this point, as their size is finalized // later. Only remove sections that have no Chunks at all. return s->chunks.empty(); }; outputSections.erase( std::remove_if(outputSections.begin(), outputSections.end(), isUnused), outputSections.end()); } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; }; outputSections.erase( std::remove_if(outputSections.begin(), outputSections.end(), isEmpty), outputSections.end()); } void Writer::assignOutputSectionIndices() { // Assign final output section indices, and assign each chunk to its output // section. uint32_t idx = 1; for (OutputSection *os : outputSections) { os->sectionIndex = idx; for (Chunk *c : os->chunks) c->setOutputSectionIdx(idx); ++idx; } // Merge chunks are containers of chunks, so assign those an output section // too. for (MergeChunk *mc : MergeChunk::instances) if (mc) for (SectionChunk *sc : mc->sections) if (sc && sc->live) sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } size_t Writer::addEntryToStringTable(StringRef str) { assert(str.size() > COFF::NameSize); size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field strtab.insert(strtab.end(), str.begin(), str.end()); strtab.push_back('\0'); return offsetOfEntry; } Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { case Symbol::DefinedAbsoluteKind: sym.Value = def->getRVA(); sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; case Symbol::DefinedSyntheticKind: // Relative symbols are unrepresentable in a COFF symbol table. return None; default: { // Don't write symbols that won't be written to the output to the symbol // table. Chunk *c = def->getChunk(); if (!c) return None; OutputSection *os = c->getOutputSection(); if (!os) return None; sym.Value = def->getRVA() - os->getRVA(); sym.SectionNumber = os->sectionIndex; break; } } // Symbols that are runtime pseudo relocations don't point to the actual // symbol data itself (as they are imported), but points to the IAT entry // instead. Avoid emitting them to the symbol table, as they can confuse // debuggers. if (def->isRuntimePseudoReloc) return None; StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; sym.Name.Offset.Offset = addEntryToStringTable(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); } if (auto *d = dyn_cast(def)) { COFFSymbolRef ref = d->getCOFFSymbol(); sym.Type = ref.getType(); sym.StorageClass = ref.getStorageClass(); } else { sym.Type = IMAGE_SYM_TYPE_NULL; sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } sym.NumberOfAuxSymbols = 0; return sym; } void Writer::createSymbolAndStringTable() { // PE/COFF images are limited to 8 byte section names. Longer names can be // supported by writing a non-standard string table, but this string table is // not mapped at runtime and the long names will therefore be inaccessible. // link.exe always truncates section names to 8 bytes, whereas binutils always // preserves long section names via the string table. LLD adopts a hybrid // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. for (OutputSection *sec : outputSections) { if (sec->name.size() <= COFF::NameSize) continue; if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; sec->setStringTableOff(addEntryToStringTable(sec->name)); } if (config->debugDwarf || config->debugSymtab) { for (ObjFile *file : ObjFile::instances) { for (Symbol *b : file->getSymbols()) { auto *d = dyn_cast_or_null(b); if (!d || d->writtenToSymtab) continue; d->writtenToSymtab = true; if (Optional sym = createSymbol(d)) outputSymtab.push_back(*sym); } } } if (outputSymtab.empty() && strtab.empty()) return; // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); fileOff += 4 + strtab.size(); fileSize = alignTo(fileOff, config->fileAlign); } void Writer::mergeSections() { if (!pdataSec->chunks.empty()) { firstPdata = pdataSec->chunks.front(); lastPdata = pdataSec->chunks.back(); } for (auto &p : config->merge) { StringRef toName = p.second; if (p.first == toName) continue; StringSet<> names; while (1) { if (!names.insert(toName).second) fatal("/merge: cycle found for section '" + p.first + "'"); auto i = config->merge.find(toName); if (i == config->merge.end()) break; toName = i->second; } OutputSection *from = findSection(p.first); OutputSection *to = findSection(toName); if (!from) continue; if (!to) { from->name = toName; continue; } to->merge(from); } } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * numberOfDataDirectory + sizeof(coff_section) * outputSections.size(); sizeOfHeaders += config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign); - uint64_t rva = pageSize; // The first page is kept unmapped. fileSize = sizeOfHeaders; + // The first page is kept unmapped. + uint64_t rva = alignTo(sizeOfHeaders, config->align); + for (OutputSection *sec : outputSections) { if (sec == relocSec) addBaserels(); uint64_t rawSize = 0, virtualSize = 0; sec->header.VirtualAddress = rva; // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. const bool isCodeSection = (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); uint32_t padding = isCodeSection ? config->functionPadMin : 0; for (Chunk *c : sec->chunks) { if (padding && c->isHotPatchable()) virtualSize += padding; virtualSize = alignTo(virtualSize, c->getAlignment()); c->setRVA(rva + virtualSize); virtualSize += c->getSize(); if (c->hasData) rawSize = alignTo(virtualSize, config->fileAlign); } if (virtualSize > UINT32_MAX) error("section larger than 4 GiB: " + sec->name); sec->header.VirtualSize = virtualSize; sec->header.SizeOfRawData = rawSize; if (rawSize != 0) sec->header.PointerToRawData = fileSize; - rva += alignTo(virtualSize, pageSize); + rva += alignTo(virtualSize, config->align); fileSize += alignTo(rawSize, config->fileAlign); } - sizeOfImage = alignTo(rva, pageSize); + sizeOfImage = alignTo(rva, config->align); // Assign addresses to sections in MergeChunks. for (MergeChunk *mc : MergeChunk::instances) if (mc) mc->assignSubsectionRVAs(); } template void Writer::writeHeader() { // Write DOS header. For backwards compatibility, the first part of a PE/COFF // executable consists of an MS-DOS MZ executable. If the executable is run // under DOS, that program gets run (usually to just print an error message). // When run under Windows, the loader looks at AddressOfNewExeHeader and uses // the PE header instead. uint8_t *buf = buffer->getBufferStart(); auto *dos = reinterpret_cast(buf); buf += sizeof(dos_header); dos->Magic[0] = 'M'; dos->Magic[1] = 'Z'; dos->UsedBytesInTheLastPage = dosStubSize % 512; dos->FileSizeInPages = divideCeil(dosStubSize, 512); dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16; dos->AddressOfRelocationTable = sizeof(dos_header); dos->AddressOfNewExeHeader = dosStubSize; // Write DOS program. memcpy(buf, dosProgram, sizeof(dosProgram)); buf += sizeof(dosProgram); // Write PE magic memcpy(buf, PEMagic, sizeof(PEMagic)); buf += sizeof(PEMagic); // Write COFF header auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); coff->Machine = config->machine; coff->NumberOfSections = outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (config->largeAddressAware) coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; if (!config->is64()) coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE; if (config->dll) coff->Characteristics |= IMAGE_FILE_DLL; if (!config->relocatable) coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; if (config->swaprunCD) coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; if (config->swaprunNet) coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP; coff->SizeOfOptionalHeader = sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory; // Write PE header auto *pe = reinterpret_cast(buf); buf += sizeof(*pe); pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; // If {Major,Minor}LinkerVersion is left at 0.0, then for some // reason signing the resulting PE file with Authenticode produces a // signature that fails to validate on Windows 7 (but is OK on 10). // Set it to 14.0, which is what VS2015 outputs, and which avoids // that problem. pe->MajorLinkerVersion = 14; pe->MinorLinkerVersion = 0; pe->ImageBase = config->imageBase; - pe->SectionAlignment = pageSize; + pe->SectionAlignment = config->align; pe->FileAlignment = config->fileAlign; pe->MajorImageVersion = config->majorImageVersion; pe->MinorImageVersion = config->minorImageVersion; pe->MajorOperatingSystemVersion = config->majorOSVersion; pe->MinorOperatingSystemVersion = config->minorOSVersion; pe->MajorSubsystemVersion = config->majorOSVersion; pe->MinorSubsystemVersion = config->minorOSVersion; pe->Subsystem = config->subsystem; pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; if (!config->noEntry) { Defined *entry = cast(config->entry); pe->AddressOfEntryPoint = entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (config->machine == ARMNT) pe->AddressOfEntryPoint |= 1; } pe->SizeOfStackReserve = config->stackReserve; pe->SizeOfStackCommit = config->stackCommit; pe->SizeOfHeapReserve = config->heapReserve; pe->SizeOfHeapCommit = config->heapCommit; if (config->appContainer) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER; if (config->dynamicBase) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; if (config->highEntropyVA) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; if (!config->allowBind) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; if (config->nxCompat) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!config->allowIsolation) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; if (config->guardCF != GuardCFLevel::Off) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF; if (config->integrityCheck) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY; if (setNoSEHCharacteristic) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (config->terminalServerAware) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; pe->NumberOfRvaAndSize = numberOfDataDirectory; if (textSec->getVirtualSize()) { pe->BaseOfCode = textSec->getRVA(); pe->SizeOfCode = textSec->getRawSize(); } pe->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *dir = reinterpret_cast(buf); buf += sizeof(*dir) * numberOfDataDirectory; if (!config->exports.empty()) { dir[EXPORT_TABLE].RelativeVirtualAddress = edata.getRVA(); dir[EXPORT_TABLE].Size = edata.getSize(); } if (importTableStart) { dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA(); dir[IMPORT_TABLE].Size = importTableSize; } if (iatStart) { dir[IAT].RelativeVirtualAddress = iatStart->getRVA(); dir[IAT].Size = iatSize; } if (rsrcSec->getVirtualSize()) { dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA(); dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize(); } if (firstPdata) { dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA(); dir[EXCEPTION_TABLE].Size = lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA(); } if (relocSec->getVirtualSize()) { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize(); } if (Symbol *sym = symtab->findUnderscore("_tls_used")) { if (Defined *b = dyn_cast(sym)) { dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA(); dir[TLS_TABLE].Size = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } if (debugDirectory) { dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA(); dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize(); } if (Symbol *sym = symtab->findUnderscore("_load_config_used")) { if (auto *b = dyn_cast(sym)) { SectionChunk *sc = b->getChunk(); assert(b->getRVA() >= sc->getRVA()); uint64_t offsetInChunk = b->getRVA() - sc->getRVA(); if (!sc->hasData || offsetInChunk + 4 > sc->getSize()) fatal("_load_config_used is malformed"); ArrayRef secContents = sc->getContents(); uint32_t loadConfigSize = *reinterpret_cast(&secContents[offsetInChunk]); if (offsetInChunk + loadConfigSize > sc->getSize()) fatal("_load_config_used is too large"); dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = b->getRVA(); dir[LOAD_CONFIG_TABLE].Size = loadConfigSize; } } if (!delayIdata.empty()) { dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = delayIdata.getDirRVA(); dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize(); } // Write section table for (OutputSection *sec : outputSections) { sec->writeHeaderTo(buf); buf += sizeof(coff_section); } sectionTable = ArrayRef( buf - outputSections.size() * sizeof(coff_section), buf); if (outputSymtab.empty() && strtab.empty()) return; coff->PointerToSymbolTable = pointerToSymbolTable; uint32_t numberOfSymbols = outputSymtab.size(); coff->NumberOfSymbols = numberOfSymbols; auto *symbolTable = reinterpret_cast( buffer->getBufferStart() + coff->PointerToSymbolTable); for (size_t i = 0; i != numberOfSymbols; ++i) symbolTable[i] = outputSymtab[i]; // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); write32le(buf, strtab.size() + 4); if (!strtab.empty()) memcpy(buf + 4, strtab.data(), strtab.size()); } void Writer::openFile(StringRef path) { buffer = CHECK( FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable), "failed to open " + path); } void Writer::createSEHTable() { SymbolRVASet handlers; for (ObjFile *file : ObjFile::instances) { if (!file->hasSafeSEH()) error("/safeseh: " + file->getName() + " is not compatible with SEH"); markSymbolsForRVATable(file, file->getSXDataChunks(), handlers); } // Set the "no SEH" characteristic if there really were no handlers, or if // there is no load config object to point to the table of handlers. setNoSEHCharacteristic = handlers.empty() || !symtab->findUnderscore("_load_config_used"); maybeAddRVATable(std::move(handlers), "__safe_se_handler_table", "__safe_se_handler_count"); } // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the // symbol's offset into that Chunk. static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { Chunk *c = s->getChunk(); if (auto *sc = dyn_cast(c)) c = sc->repl; // Look through ICF replacement. uint32_t off = s->getRVA() - (c ? c->getRVA() : 0); rvaSet.insert({c, off}); } // Given a symbol, add it to the GFIDs table if it is a live, defined, function // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, Symbol *s) { if (!s) return; switch (s->kind()) { case Symbol::DefinedLocalImportKind: case Symbol::DefinedImportDataKind: // Defines an __imp_ pointer, so it is data, so it is ignored. break; case Symbol::DefinedCommonKind: // Common is always data, so it is ignored. break; case Symbol::DefinedAbsoluteKind: case Symbol::DefinedSyntheticKind: // Absolute is never code, synthetic generally isn't and usually isn't // determinable. break; case Symbol::LazyKind: case Symbol::UndefinedKind: // Undefined symbols resolve to zero, so they don't have an RVA. Lazy // symbols shouldn't have relocations. break; case Symbol::DefinedImportThunkKind: // Thunks are always code, include them. addSymbolToRVASet(addressTakenSyms, cast(s)); break; case Symbol::DefinedRegularKind: { // This is a regular, defined, symbol from a COFF file. Mark the symbol as // address taken if the symbol type is function and it's in an executable // section. auto *d = cast(s); if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { SectionChunk *sc = dyn_cast(d->getChunk()); if (sc && sc->live && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) addSymbolToRVASet(addressTakenSyms, d); } break; } } } // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. static void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols) { for (Chunk *c : file->getChunks()) { // We only care about live section chunks. Common chunks and other chunks // don't generally contain relocations. SectionChunk *sc = dyn_cast(c); if (!sc || !sc->live) continue; for (const coff_relocation &reloc : sc->getRelocs()) { if (config->machine == I386 && reloc.Type == COFF::IMAGE_REL_I386_REL32) // Ignore relative relocations on x86. On x86_64 they can't be ignored // since they're also used to compute absolute addresses. continue; Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex); maybeAddAddressTakenFunction(usedSymbols, ref); } } } // Create the guard function id table. This is a table of RVAs of all // address-taken functions. It is sorted and uniqued, just like the safe SEH // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; SymbolRVASet longJmpTargets; for (ObjFile *file : ObjFile::instances) { // If the object was compiled with /guard:cf, the address taken symbols // are in .gfids$y sections, and the longjmp targets are in .gljmp$y // sections. If the object was not compiled with /guard:cf, we assume there // were no setjmp targets, and that all code symbols with relocations are // possibly address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); } } // Mark the image entry as address-taken. if (config->entry) maybeAddAddressTakenFunction(addressTakenSyms, config->entry); // Mark exported symbols in executable sections as address-taken. for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) c.inputChunk->setAlignment(16); maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); // Add the longjmp target table unless the user told us not to. if (config->guardCF == GuardCFLevel::Full) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", "__guard_longjmp_count"); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. uint32_t guardFlags = uint32_t(coff_guard_flags::CFInstrumented) | uint32_t(coff_guard_flags::HasFidTable); if (config->guardCF == GuardCFLevel::Full) guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable); Symbol *flagSym = symtab->findUnderscore("__guard_flags"); cast(flagSym)->setVA(guardFlags); } // Take a list of input sections containing symbol table indices and add those // symbols to an RVA table. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. // In this case, the associated gfids section is discarded, and we don't // mark the virtual member functions as address-taken by the vtable. if (!c->live) continue; // Validate that the contents look like symbol table indices. ArrayRef data = c->getContents(); if (data.size() % 4 != 0) { warn("ignoring " + c->getSectionName() + " symbol table index section in object " + toString(file)); continue; } // Read each symbol table index and check if that symbol was included in the // final link. If so, add it to the table symbol set. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); for (uint32_t symIndex : symIndices) { if (symIndex >= objSymbols.size()) { warn("ignoring invalid symbol table index in section " + c->getSectionName() + " in object " + toString(file)); continue; } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) addSymbolToRVASet(tableSymbols, cast(s)); } } } } // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym) { if (tableSymbols.empty()) return; RVATableChunk *tableChunk = make(std::move(tableSymbols)); rdataSec->addChunk(tableChunk); Symbol *t = symtab->findUnderscore(tableSym); Symbol *c = symtab->findUnderscore(countSym); replaceSymbol(t, t->getName(), tableChunk); cast(c)->setVA(tableChunk->getSize() / 4); } // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the // runtime uses for finding the table. void Writer::createRuntimePseudoRelocs() { std::vector rels; for (Chunk *c : symtab->getChunks()) { auto *sc = dyn_cast(c); if (!sc || !sc->live) continue; sc->getRuntimePseudoRelocs(rels); } if (!rels.empty()) log("Writing " + Twine(rels.size()) + " runtime pseudo relocations"); PseudoRelocTableChunk *table = make(rels); rdataSec->addChunk(table); EmptyChunk *endOfList = make(); rdataSec->addChunk(endOfList); Symbol *headSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__"); Symbol *endSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__"); replaceSymbol(headSym, headSym->getName(), table); replaceSymbol(endSym, endSym->getName(), endOfList); } // MinGW specific. // The MinGW .ctors and .dtors lists have sentinels at each end; // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ // and __DTOR_LIST__ respectively. void Writer::insertCtorDtorSymbols() { AbsolutePointerChunk *ctorListHead = make(-1); AbsolutePointerChunk *ctorListEnd = make(0); AbsolutePointerChunk *dtorListHead = make(-1); AbsolutePointerChunk *dtorListEnd = make(0); ctorsSec->insertChunkAtStart(ctorListHead); ctorsSec->addChunk(ctorListEnd); dtorsSec->insertChunkAtStart(dtorListHead); dtorsSec->addChunk(dtorListEnd); Symbol *ctorListSym = symtab->findUnderscore("__CTOR_LIST__"); Symbol *dtorListSym = symtab->findUnderscore("__DTOR_LIST__"); replaceSymbol(ctorListSym, ctorListSym->getName(), ctorListHead); replaceSymbol(dtorListSym, dtorListSym->getName(), dtorListHead); } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { for (auto &p : config->section) { StringRef name = p.first; uint32_t perm = p.second; for (OutputSection *sec : outputSections) if (sec->name == name) sec->setPermissions(perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { // Record the number of sections to apply section index relocations // against absolute symbols. See applySecIdx in Chunks.cpp.. DefinedAbsolute::numOutputSections = outputSections.size(); uint8_t *buf = buffer->getBufferStart(); for (OutputSection *sec : outputSections) { uint8_t *secBuf = buf + sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); } } void Writer::writeBuildId() { // There are two important parts to the build ID. // 1) If building with debug info, the COFF debug directory contains a // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the // PE contents. if (config->debug) { assert(buildId && "BuildId is not set!"); // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the // "timestamp" in the COFF file header, and the ones in the coff debug // directory. Now we can hash the file and write that hash to the various // timestamp fields in the file. StringRef outputFileData( reinterpret_cast(buffer->getBufferStart()), buffer->getBufferSize()); uint32_t timestamp = config->timestamp; uint64_t hash = 0; bool generateSyntheticBuildId = config->mingw && config->debug && config->pdbPath.empty(); if (config->repro || generateSyntheticBuildId) hash = xxHash64(outputFileData); if (config->repro) timestamp = static_cast(hash); if (generateSyntheticBuildId) { // For MinGW builds without a PDB file, we still generate a build id // to allow associating a crash dump to the executable. buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70; buildId->buildId->PDB70.Age = 1; memcpy(buildId->buildId->PDB70.Signature, &hash, 8); // xxhash only gives us 8 bytes, so put some fixed data in the other half. memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8); } if (debugDirectory) debugDirectory->setTimeDateStamp(timestamp); uint8_t *buf = buffer->getBufferStart(); buf += dosStubSize + sizeof(PEMagic); object::coff_file_header *coffHeader = reinterpret_cast(buf); coffHeader->TimeDateStamp = timestamp; } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { if (!firstPdata) return; // We assume .pdata contains function table entries only. auto bufAddr = [&](Chunk *c) { OutputSection *os = c->getOutputSection(); return buffer->getBufferStart() + os->getFileOff() + c->getRVA() - os->getRVA(); }; uint8_t *begin = bufAddr(firstPdata); uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } errs() << "warning: don't know how to handle .pdata.\n"; } // The CRT section contains, among other things, the array of function // pointers that initialize every global variable that is not trivially // constructed. The CRT calls them one after the other prior to invoking // main(). // // As per C++ spec, 3.6.2/2.3, // "Variables with ordered initialization defined within a single // translation unit shall be initialized in the order of their definitions // in the translation unit" // // It is therefore critical to sort the chunks containing the function // pointers in the order that they are listed in the object file (top to // bottom), otherwise global objects might not be initialized in the // correct order. void Writer::sortCRTSectionChunks(std::vector &chunks) { auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) { auto sa = dyn_cast(a); auto sb = dyn_cast(b); assert(sa && sb && "Non-section chunks in CRT section!"); StringRef sAObj = sa->file->mb.getBufferIdentifier(); StringRef sBObj = sb->file->mb.getBufferIdentifier(); return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber(); }; llvm::stable_sort(chunks, sectionChunkOrder); if (config->verbose) { for (auto &c : chunks) { auto sc = dyn_cast(c); log(" " + sc->file->mb.getBufferIdentifier().str() + ", SectionID: " + Twine(sc->getSectionNumber())); } } } OutputSection *Writer::findSection(StringRef name) { for (OutputSection *sec : outputSections) if (sec->name == name) return sec; return nullptr; } uint32_t Writer::getSizeOfInitializedData() { uint32_t res = 0; for (OutputSection *s : outputSections) if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) res += s->getRawSize(); return res; } // Add base relocations to .reloc section. void Writer::addBaserels() { if (!config->relocatable) return; relocSec->chunks.clear(); std::vector v; for (OutputSection *sec : outputSections) { if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. for (Chunk *c : sec->chunks) c->getBaserels(&v); // Add the addresses to .reloc section. if (!v.empty()) addBaserelBlocks(v); v.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(std::vector &v) { const uint32_t mask = ~uint32_t(pageSize - 1); uint32_t page = v[0].rva & mask; size_t i = 0, j = 1; for (size_t e = v.size(); j < e; ++j) { uint32_t p = v[j].rva & mask; if (p == page) continue; relocSec->addChunk(make(page, &v[i], &v[0] + j)); i = j; page = p; } if (i == j) return; relocSec->addChunk(make(page, &v[i], &v[0] + j)); } PartialSection *Writer::createPartialSection(StringRef name, uint32_t outChars) { PartialSection *&pSec = partialSections[{name, outChars}]; if (pSec) return pSec; pSec = make(name, outChars); return pSec; } PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { auto it = partialSections.find({name, outChars}); if (it != partialSections.end()) return it->second; return nullptr; } Index: vendor/lld/dist-release_90/ELF/Arch/PPC.cpp =================================================================== --- vendor/lld/dist-release_90/ELF/Arch/PPC.cpp (revision 351717) +++ vendor/lld/dist-release_90/ELF/Arch/PPC.cpp (revision 351718) @@ -1,432 +1,441 @@ //===- PPC.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; namespace { class PPC final : public TargetInfo { public: PPC(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; void writeGotHeader(uint8_t *buf) const override; void writePltHeader(uint8_t *buf) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } void writeGotPlt(uint8_t *buf, const Symbol &s) const override; bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, uint64_t branchAddr, const Symbol &s) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; int getTlsGdRelaxSkip(RelType type) const override; void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; }; } // namespace static uint16_t lo(uint32_t v) { return v; } static uint16_t ha(uint32_t v) { return (v + 0x8000) >> 16; } static uint32_t readFromHalf16(const uint8_t *loc) { return read32(config->isLE ? loc : loc - 2); } static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. // // a) With immediate binding (BIND_NOW), the .plt entry is resolved at load // time. The .glink section is not used. // b) With lazy binding, the .plt entry points to a `b PLTresolve` // instruction in .glink, filled in by PPC::writeGotPlt(). // Write N `b PLTresolve` first. for (size_t i = 0; i != numEntries; ++i) write32(buf + 4 * i, 0x48000000 | 4 * (numEntries - i)); buf += 4 * numEntries; // Then write PLTresolve(), which has two forms: PIC and non-PIC. PLTresolve() // computes the PLT index (by computing the distance from the landing b to // itself) and calls _dl_runtime_resolve() (in glibc). uint32_t got = in.got->getVA(); uint32_t glink = in.plt->getVA(); // VA of .glink const uint8_t *end = buf + 64; if (config->isPic) { uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12; uint32_t gotBcl = got + 4 - (glink + afterBcl); write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha write32(buf + 4, 0x7c0802a6); // mflr r0 write32(buf + 8, 0x429f0005); // bcl 20,30,.+4 write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l write32(buf + 16, 0x7d8802a6); // mflr r12 write32(buf + 20, 0x7c0803a6); // mtlr r0 write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12 write32(buf + 28, 0x3d8c0000 | ha(gotBcl)); // addis 12,12,GOT+4-1b@ha if (ha(gotBcl) == ha(gotBcl + 4)) { write32(buf + 32, 0x800c0000 | lo(gotBcl)); // lwz r0,r12,GOT+4-1b@l(r12) write32(buf + 36, 0x818c0000 | lo(gotBcl + 4)); // lwz r12,r12,GOT+8-1b@l(r12) } else { write32(buf + 32, 0x840c0000 | lo(gotBcl)); // lwzu r0,r12,GOT+4-1b@l(r12) write32(buf + 36, 0x818c0000 | 4); // lwz r12,r12,4(r12) } write32(buf + 40, 0x7c0903a6); // mtctr 0 write32(buf + 44, 0x7c0b5a14); // add r0,11,11 write32(buf + 48, 0x7d605a14); // add r11,0,11 write32(buf + 52, 0x4e800420); // bctr buf += 56; } else { write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-Glink@ha if (ha(got + 4) == ha(got + 8)) write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12) else write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12) write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-Glink@l write32(buf + 16, 0x7c0903a6); // mtctr r0 write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11 if (ha(got + 4) == ha(got + 8)) write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12) else write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12) write32(buf + 28, 0x7d605a14); // add r11,r0,r11 write32(buf + 32, 0x4e800420); // bctr buf += 36; } // Pad with nop. They should not be executed. for (; buf < end; buf += 4) write32(buf, 0x60000000); } PPC::PPC() { gotRel = R_PPC_GLOB_DAT; noneRel = R_PPC_NONE; pltRel = R_PPC_JMP_SLOT; relativeRel = R_PPC_RELATIVE; iRelativeRel = R_PPC_IRELATIVE; symbolicRel = R_PPC_ADDR32; gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 3; gotPltHeaderEntriesNum = 0; pltHeaderSize = 64; // size of PLTresolve in .glink pltEntrySize = 4; needsThunks = true; tlsModuleIndexRel = R_PPC_DTPMOD32; tlsOffsetRel = R_PPC_DTPREL32; tlsGotRel = R_PPC_TPREL32; defaultMaxPageSize = 65536; defaultImageBase = 0x10000000; write32(trapInstr.data(), 0x7fe00008); } void PPC::writeGotHeader(uint8_t *buf) const { // _GLOBAL_OFFSET_TABLE_[0] = _DYNAMIC // glibc stores _dl_runtime_resolve in _GLOBAL_OFFSET_TABLE_[1], // link_map in _GLOBAL_OFFSET_TABLE_[2]. write32(buf, mainPart->dynamic->getVA()); } void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { // Address of the symbol resolver stub in .glink . write32(buf, in.plt->getVA() + 4 * s.pltIndex); } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s) const { if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) return true; if (s.isUndefWeak()) return false; return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA())); } uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; } bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { uint64_t offset = dst - src; if (type == R_PPC_REL24 || type == R_PPC_PLTREL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } RelExpr PPC::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { switch (type) { + case R_PPC_NONE: + return R_NONE; + case R_PPC_ADDR16_HA: + case R_PPC_ADDR16_HI: + case R_PPC_ADDR16_LO: + case R_PPC_ADDR32: + return R_ABS; case R_PPC_DTPREL16: case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: case R_PPC_DTPREL32: return R_DTPREL; case R_PPC_REL14: case R_PPC_REL32: case R_PPC_LOCAL24PC: case R_PPC_REL16_LO: case R_PPC_REL16_HI: case R_PPC_REL16_HA: return R_PC; case R_PPC_GOT16: return R_GOT_OFF; case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: return R_PPC32_PLTREL; case R_PPC_GOT_TLSGD16: return R_TLSGD_GOT; case R_PPC_GOT_TLSLD16: return R_TLSLD_GOT; case R_PPC_GOT_TPREL16: return R_GOT_OFF; case R_PPC_TLS: return R_TLSIE_HINT; case R_PPC_TLSGD: return R_TLSDESC_CALL; case R_PPC_TLSLD: return R_TLSLD_HINT; case R_PPC_TPREL16: case R_PPC_TPREL16_HA: case R_PPC_TPREL16_LO: case R_PPC_TPREL16_HI: return R_TLS; default: - return R_ABS; + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; } } RelType PPC::getDynRel(RelType type) const { if (type == R_PPC_ADDR32) return type; return R_PPC_NONE; } static std::pair fromDTPREL(RelType type, uint64_t val) { uint64_t dtpBiasedVal = val - 0x8000; switch (type) { case R_PPC_DTPREL16: return {R_PPC64_ADDR16, dtpBiasedVal}; case R_PPC_DTPREL16_HA: return {R_PPC_ADDR16_HA, dtpBiasedVal}; case R_PPC_DTPREL16_HI: return {R_PPC_ADDR16_HI, dtpBiasedVal}; case R_PPC_DTPREL16_LO: return {R_PPC_ADDR16_LO, dtpBiasedVal}; case R_PPC_DTPREL32: return {R_PPC_ADDR32, dtpBiasedVal}; default: return {type, val}; } } void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { RelType newType; std::tie(newType, val) = fromDTPREL(type, val); switch (newType) { case R_PPC_ADDR16: checkIntUInt(loc, val, 16, type); write16(loc, val); break; case R_PPC_GOT16: case R_PPC_GOT_TLSGD16: case R_PPC_GOT_TLSLD16: case R_PPC_GOT_TPREL16: case R_PPC_TPREL16: checkInt(loc, val, 16, type); write16(loc, val); break; case R_PPC_ADDR16_HA: case R_PPC_DTPREL16_HA: case R_PPC_GOT_TLSGD16_HA: case R_PPC_GOT_TLSLD16_HA: case R_PPC_GOT_TPREL16_HA: case R_PPC_REL16_HA: case R_PPC_TPREL16_HA: write16(loc, ha(val)); break; case R_PPC_ADDR16_HI: case R_PPC_DTPREL16_HI: case R_PPC_GOT_TLSGD16_HI: case R_PPC_GOT_TLSLD16_HI: case R_PPC_GOT_TPREL16_HI: case R_PPC_REL16_HI: case R_PPC_TPREL16_HI: write16(loc, val >> 16); break; case R_PPC_ADDR16_LO: case R_PPC_DTPREL16_LO: case R_PPC_GOT_TLSGD16_LO: case R_PPC_GOT_TLSLD16_LO: case R_PPC_GOT_TPREL16_LO: case R_PPC_REL16_LO: case R_PPC_TPREL16_LO: write16(loc, val); break; case R_PPC_ADDR32: case R_PPC_REL32: write32(loc, val); break; case R_PPC_REL14: { uint32_t mask = 0x0000FFFC; checkInt(loc, val, 16, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC_REL24: case R_PPC_LOCAL24PC: case R_PPC_PLTREL24: { uint32_t mask = 0x03FFFFFC; checkInt(loc, val, 26, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + llvm_unreachable("unknown relocation"); } } RelExpr PPC::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; return expr; } int PPC::getTlsGdRelaxSkip(RelType type) const { // A __tls_get_addr call instruction is marked with 2 relocations: // // R_PPC_TLSGD / R_PPC_TLSLD: marker relocation // R_PPC_REL24: __tls_get_addr // // After the relaxation we no longer call __tls_get_addr and should skip both // relocations to not create a false dependence on __tls_get_addr being // defined. if (type == R_PPC_TLSGD || type == R_PPC_TLSLD) return 2; return 1; } void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSGD16: { // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA) uint32_t insn = readFromHalf16(loc); writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000)); relocateOne(loc, R_PPC_GOT_TPREL16, val); break; } case R_PPC_TLSGD: // bl __tls_get_addr(x@tldgd) --> add r3, r3, r2 write32(loc, 0x7c631214); break; default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSGD16: // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha writeFromHalf16(loc, 0x3c620000 | ha(val)); break; case R_PPC_TLSGD: // bl __tls_get_addr(x@tldgd) --> add r3, r3, x@tprel@l write32(loc, 0x38630000 | lo(val)); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TLSLD16: // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0 writeFromHalf16(loc, 0x3c620000); break; case R_PPC_TLSLD: // r3+x@dtprel computes r3+x-0x8000, while we want it to compute r3+x@tprel // = r3+x-0x7000, so add 4096 to r3. // bl __tls_get_addr(x@tlsld) --> addi r3, r3, 4096 write32(loc, 0x38631000); break; case R_PPC_DTPREL16: case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: relocateOne(loc, type, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC_GOT_TPREL16: { // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha uint32_t rt = readFromHalf16(loc) & 0x03e00000; writeFromHalf16(loc, 0x3c020000 | rt | ha(val)); break; } case R_PPC_TLS: { uint32_t insn = read32(loc); if (insn >> 26 != 31) error("unrecognized instruction for IE to LE R_PPC_TLS"); // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1); if (dFormOp == 0) error("unrecognized instruction for IE to LE R_PPC_TLS"); write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val)); break; } default: llvm_unreachable("unsupported relocation for TLS IE to LE relaxation"); } } TargetInfo *elf::getPPCTargetInfo() { static PPC target; return ⌖ } Index: vendor/lld/dist-release_90/ELF/Arch/PPC64.cpp =================================================================== --- vendor/lld/dist-release_90/ELF/Arch/PPC64.cpp (revision 351717) +++ vendor/lld/dist-release_90/ELF/Arch/PPC64.cpp (revision 351718) @@ -1,1077 +1,1095 @@ //===- PPC64.cpp ----------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; static uint64_t ppc64TocOffset = 0x8000; static uint64_t dynamicThreadPointerOffset = 0x8000; // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform // instructions that can be used as part of the initial exec TLS sequence. enum XFormOpcd { LBZX = 87, LHZX = 279, LWZX = 23, LDX = 21, STBX = 215, STHX = 407, STWX = 151, STDX = 149, ADD = 266, }; enum DFormOpcd { LBZ = 34, LBZU = 35, LHZ = 40, LHZU = 41, LHAU = 43, LWZ = 32, LWZU = 33, LFSU = 49, LD = 58, LFDU = 51, STB = 38, STBU = 39, STH = 44, STHU = 45, STW = 36, STWU = 37, STFSU = 53, STFDU = 55, STD = 62, ADDI = 14 }; uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always // the .got. uint64_t tocVA = in.got->getVA(); // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 // thus permitting a full 64 Kbytes segment. Note that the glibc startup // code (crt1.o) assumes that you can get from the TOC base to the // start of the .toc section with only a single (signed) 16-bit relocation. return tocVA + ppc64TocOffset; } unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { // The offset is encoded into the 3 most significant bits of the st_other // field, with some special values described in section 3.4.1 of the ABI: // 0 --> Zero offset between the GEP and LEP, and the function does NOT use // the TOC pointer (r2). r2 will hold the same value on returning from // the function as it did on entering the function. // 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a // caller-saved register for all callers. // 2-6 --> The binary logarithm of the offset eg: // 2 --> 2^2 = 4 bytes --> 1 instruction. // 6 --> 2^6 = 64 bytes --> 16 instructions. // 7 --> Reserved. uint8_t gepToLep = (stOther >> 5) & 7; if (gepToLep < 2) return 0; // The value encoded in the st_other bits is the // log-base-2(offset). if (gepToLep < 7) return 1 << gepToLep; error("reserved value of 7 in the 3 most-significant-bits of st_other"); return 0; } bool elf::isPPC64SmallCodeModelTocReloc(RelType type) { // The only small code model relocations that access the .toc section. return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } // Find the R_PPC64_ADDR64 in .rela.toc with matching offset. template static std::pair getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { if (tocSec->numRelocations == 0) return {}; // .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by // r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the // relocation index in most cases. // // In rare cases a TOC entry may store a constant that doesn't need an // R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8 // points to a relocation with larger r_offset. Do a linear probe then. // Constants are extremely uncommon in .toc and the extra number of array // accesses can be seen as a small constant. ArrayRef relas = tocSec->template relas(); uint64_t index = std::min(offset / 8, relas.size() - 1); for (;;) { if (relas[index].r_offset == offset) { Symbol &sym = tocSec->getFile()->getRelocTargetSym(relas[index]); return {dyn_cast(&sym), getAddend(relas[index])}; } if (relas[index].r_offset < offset || index == 0) break; --index; } return {}; } // When accessing a symbol defined in another translation unit, compilers // reserve a .toc entry, allocate a local label and generate toc-indirect // instuctions: // // addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA // ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry // ld/lwa 3, 0(3) # load the value from the address // // .section .toc,"aw",@progbits // .LC0: .tc var[TC],var // // If var is defined, non-preemptable and addressable with a 32-bit signed // offset from the toc base, the address of var can be computed by adding an // offset to the toc base, saving a load. // // addis 3,2,var@toc@ha # this may be relaxed to a nop, // addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc // ld/lwa 3, 0(3) # load the value from the address // // Returns true if the relaxation is performed. bool elf::tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, uint8_t *bufLoc) { assert(config->tocOptimize); if (rel.addend < 0) return false; // If the symbol is not the .toc section, this isn't a toc-indirection. Defined *defSym = dyn_cast(rel.sym); if (!defSym || !defSym->isSection() || defSym->section->name != ".toc") return false; Defined *d; int64_t addend; auto *tocISB = cast(defSym->section); std::tie(d, addend) = config->isLE ? getRelaTocSymAndAddend(tocISB, rel.addend) : getRelaTocSymAndAddend(tocISB, rel.addend); // Only non-preemptable defined symbols can be relaxed. if (!d || d->isPreemptible) return false; // Two instructions can materialize a 32-bit signed offset from the toc base. uint64_t tocRelative = d->getVA(addend) - getPPC64TocBase(); if (!isInt<32>(tocRelative)) return false; // Add PPC64TocOffset that will be subtracted by relocateOne(). target->relaxGot(bufLoc, type, tocRelative + ppc64TocOffset); return true; } namespace { class PPC64 final : public TargetInfo { public: PPC64(); int getTlsGdRelaxSkip(RelType type) const override; uint32_t calcEFlags() const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; }; } // namespace // Relocation masks following the #lo(value), #hi(value), #ha(value), // #higher(value), #highera(value), #highest(value), and #highesta(value) // macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi // document. static uint16_t lo(uint64_t v) { return v; } static uint16_t hi(uint64_t v) { return v >> 16; } static uint16_t ha(uint64_t v) { return (v + 0x8000) >> 16; } static uint16_t higher(uint64_t v) { return v >> 32; } static uint16_t highera(uint64_t v) { return (v + 0x8000) >> 32; } static uint16_t highest(uint64_t v) { return v >> 48; } static uint16_t highesta(uint64_t v) { return (v + 0x8000) >> 48; } // Extracts the 'PO' field of an instruction encoding. static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); } static bool isDQFormInstruction(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: return false; case 56: // The only instruction with a primary opcode of 56 is `lq`. return true; case 61: // There are both DS and DQ instruction forms with this primary opcode. // Namely `lxv` and `stxv` are the DQ-forms that use it. // The DS 'XO' bits being set to 01 is restricted to DQ form. return (encoding & 3) == 0x1; } } static bool isInstructionUpdateForm(uint32_t encoding) { switch (getPrimaryOpCode(encoding)) { default: return false; case LBZU: case LHAU: case LHZU: case LWZU: case LFSU: case LFDU: case STBU: case STHU: case STWU: case STFSU: case STFDU: return true; // LWA has the same opcode as LD, and the DS bits is what differentiates // between LD/LDU/LWA case LD: case STD: return (encoding & 3) == 1; } } // There are a number of places when we either want to read or write an // instruction when handling a half16 relocation type. On big-endian the buffer // pointer is pointing into the middle of the word we want to extract, and on // little-endian it is pointing to the start of the word. These 2 helpers are to // simplify reading and writing in that context. static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } static uint32_t readFromHalf16(const uint8_t *loc) { return read32(config->isLE ? loc : loc - 2); } PPC64::PPC64() { gotRel = R_PPC64_GLOB_DAT; noneRel = R_PPC64_NONE; pltRel = R_PPC64_JMP_SLOT; relativeRel = R_PPC64_RELATIVE; iRelativeRel = R_PPC64_IRELATIVE; symbolicRel = R_PPC64_ADDR64; pltEntrySize = 4; gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 1; gotPltHeaderEntriesNum = 2; pltHeaderSize = 60; needsThunks = true; tlsModuleIndexRel = R_PPC64_DTPMOD64; tlsOffsetRel = R_PPC64_DTPREL64; tlsGotRel = R_PPC64_TPREL64; needsMoreStackNonSplit = false; // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). defaultMaxPageSize = 65536; // The PPC64 ELF ABI v1 spec, says: // // It is normally desirable to put segments with different characteristics // in separate 256 Mbyte portions of the address space, to give the // operating system full paging flexibility in the 64-bit address space. // // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers // use 0x10000000 as the starting address. defaultImageBase = 0x10000000; write32(trapInstr.data(), 0x7fe00008); } int PPC64::getTlsGdRelaxSkip(RelType type) const { // A __tls_get_addr call instruction is marked with 2 relocations: // // R_PPC64_TLSGD / R_PPC64_TLSLD: marker relocation // R_PPC64_REL24: __tls_get_addr // // After the relaxation we no longer call __tls_get_addr and should skip both // relocations to not create a false dependence on __tls_get_addr being // defined. if (type == R_PPC64_TLSGD || type == R_PPC64_TLSLD) return 2; return 1; } static uint32_t getEFlags(InputFile *file) { if (config->ekind == ELF64BEKind) return cast>(file)->getObj().getHeader()->e_flags; return cast>(file)->getObj().getHeader()->e_flags; } // This file implements v2 ABI. This function makes sure that all // object files have v2 or an unspecified version as an ABI version. uint32_t PPC64::calcEFlags() const { for (InputFile *f : objectFiles) { uint32_t flag = getEFlags(f); if (flag == 1) error(toString(f) + ": ABI version 1 is not supported"); else if (flag > 2) error(toString(f) + ": unrecognized e_flags: " + Twine(flag)); } return 2; } void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC64_TOC16_HA: // Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop". relocateOne(loc, type, val); break; case R_PPC64_TOC16_LO_DS: { // Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or // "addi reg, 2, var@toc". uint32_t insn = readFromHalf16(loc); if (getPrimaryOpCode(insn) != LD) error("expected a 'ld' for got-indirect to toc-relative relaxing"); writeFromHalf16(loc, (insn & 0x03ffffff) | 0x38000000); relocateOne(loc, R_PPC64_TOC16_LO, val); break; } default: llvm_unreachable("unexpected relocation type"); } } void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x // R_PPC64_REL24 __tls_get_addr // nop None None // Relaxing to local exec entails converting: // addis r3, r2, x@got@tlsgd@ha into nop // addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, x@tprel@l switch (type) { case R_PPC64_GOT_TLSGD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 relocateOne(loc, R_PPC64_TPREL16_HA, val); break; case R_PPC64_TLSGD: write32(loc, 0x60000000); // nop write32(loc + 4, 0x38630000); // addi r3, r3 // Since we are relocating a half16 type relocation and Loc + 4 points to // the start of an instruction we need to advance the buffer by an extra // 2 bytes on BE. relocateOne(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), R_PPC64_TPREL16_LO, val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement. // The local dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol // addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x // addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x // R_PPC64_REL24 __tls_get_addr // nop None None // Relaxing to local exec entails converting: // addis r3, r2, x@got@tlsld@ha into nop // addi r3, r3, x@got@tlsld@l into addis r3, r13, 0 // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, 4096 switch (type) { case R_PPC64_GOT_TLSLD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; case R_PPC64_GOT_TLSLD16_LO: writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13, 0 break; case R_PPC64_TLSLD: write32(loc, 0x60000000); // nop write32(loc + 4, 0x38631000); // addi r3, r3, 4096 break; case R_PPC64_DTPREL16: case R_PPC64_DTPREL16_HA: case R_PPC64_DTPREL16_HI: case R_PPC64_DTPREL16_DS: case R_PPC64_DTPREL16_LO: case R_PPC64_DTPREL16_LO_DS: relocateOne(loc, type, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: return LBZ; case LHZX: return LHZ; case LWZX: return LWZ; case LDX: return LD; case STBX: return STB; case STHX: return STH; case STWX: return STW; case STDX: return STD; case ADD: return ADDI; default: return 0; } } void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { // The initial exec code sequence for a global `x` will look like: // Instruction Relocation Symbol // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x // ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x // add r9, r9, x@tls R_PPC64_TLS x // Relaxing to local exec entails converting: // addis r9, r2, x@got@tprel@ha into nop // ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha // add r9, r9, x@tls into addi r9, r9, x@tprel@l // x@tls R_PPC64_TLS is a relocation which does not compute anything, // it is replaced with r13 (thread pointer). // The add instruction in the initial exec sequence has multiple variations // that need to be handled. If we are building an address it will use an add // instruction, if we are accessing memory it will use any of the X-form // indexed load or store instructions. unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0; switch (type) { case R_PPC64_GOT_TPREL16_HA: write32(loc - offset, 0x60000000); // nop break; case R_PPC64_GOT_TPREL16_LO_DS: case R_PPC64_GOT_TPREL16_DS: { uint32_t regNo = read32(loc - offset) & 0x03E00000; // bits 6-10 write32(loc - offset, 0x3C0D0000 | regNo); // addis RegNo, r13 relocateOne(loc, R_PPC64_TPREL16_HA, val); break; } case R_PPC64_TLS: { uint32_t primaryOp = getPrimaryOpCode(read32(loc)); if (primaryOp != 31) error("unrecognized instruction for IE to LE R_PPC64_TLS"); uint32_t secondaryOp = (read32(loc) & 0x000007FE) >> 1; // bits 21-30 uint32_t dFormOp = getPPCDFormOp(secondaryOp); if (dFormOp == 0) error("unrecognized instruction for IE to LE R_PPC64_TLS"); write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); relocateOne(loc + offset, R_PPC64_TPREL16_LO, val); break; } default: llvm_unreachable("unknown relocation for IE to LE"); break; } } RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { switch (type) { + case R_PPC64_NONE: + return R_NONE; + case R_PPC64_ADDR16: + case R_PPC64_ADDR16_DS: + case R_PPC64_ADDR16_HA: + case R_PPC64_ADDR16_HI: + case R_PPC64_ADDR16_HIGHER: + case R_PPC64_ADDR16_HIGHERA: + case R_PPC64_ADDR16_HIGHEST: + case R_PPC64_ADDR16_HIGHESTA: + case R_PPC64_ADDR16_LO: + case R_PPC64_ADDR16_LO_DS: + case R_PPC64_ADDR32: + case R_PPC64_ADDR64: + return R_ABS; case R_PPC64_GOT16: case R_PPC64_GOT16_DS: case R_PPC64_GOT16_HA: case R_PPC64_GOT16_HI: case R_PPC64_GOT16_LO: case R_PPC64_GOT16_LO_DS: return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: return R_GOTREL; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: return config->tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; case R_PPC64_TOC: return R_PPC64_TOCBASE; case R_PPC64_REL14: case R_PPC64_REL24: return R_PPC64_CALL_PLT; case R_PPC64_REL16_LO: case R_PPC64_REL16_HA: + case R_PPC64_REL16_HI: case R_PPC64_REL32: case R_PPC64_REL64: return R_PC; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_HA: case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSGD16_LO: return R_TLSGD_GOT; case R_PPC64_GOT_TLSLD16: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TLSLD16_HI: case R_PPC64_GOT_TLSLD16_LO: return R_TLSLD_GOT; case R_PPC64_GOT_TPREL16_HA: case R_PPC64_GOT_TPREL16_LO_DS: case R_PPC64_GOT_TPREL16_DS: case R_PPC64_GOT_TPREL16_HI: return R_GOT_OFF; case R_PPC64_GOT_DTPREL16_HA: case R_PPC64_GOT_DTPREL16_LO_DS: case R_PPC64_GOT_DTPREL16_DS: case R_PPC64_GOT_DTPREL16_HI: return R_TLSLD_GOT_OFF; case R_PPC64_TPREL16: case R_PPC64_TPREL16_HA: case R_PPC64_TPREL16_LO: case R_PPC64_TPREL16_HI: case R_PPC64_TPREL16_DS: case R_PPC64_TPREL16_LO_DS: case R_PPC64_TPREL16_HIGHER: case R_PPC64_TPREL16_HIGHERA: case R_PPC64_TPREL16_HIGHEST: case R_PPC64_TPREL16_HIGHESTA: return R_TLS; case R_PPC64_DTPREL16: case R_PPC64_DTPREL16_DS: case R_PPC64_DTPREL16_HA: case R_PPC64_DTPREL16_HI: case R_PPC64_DTPREL16_HIGHER: case R_PPC64_DTPREL16_HIGHERA: case R_PPC64_DTPREL16_HIGHEST: case R_PPC64_DTPREL16_HIGHESTA: case R_PPC64_DTPREL16_LO: case R_PPC64_DTPREL16_LO_DS: case R_PPC64_DTPREL64: return R_DTPREL; case R_PPC64_TLSGD: return R_TLSDESC_CALL; case R_PPC64_TLSLD: return R_TLSLD_HINT; case R_PPC64_TLS: return R_TLSIE_HINT; default: - return R_ABS; + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; } } RelType PPC64::getDynRel(RelType type) const { if (type == R_PPC64_ADDR64 || type == R_PPC64_TOC) return R_PPC64_ADDR64; return R_PPC64_NONE; } void PPC64::writeGotHeader(uint8_t *buf) const { write64(buf, getPPC64TocBase()); } void PPC64::writePltHeader(uint8_t *buf) const { // The generic resolver stub goes first. write32(buf + 0, 0x7c0802a6); // mflr r0 write32(buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8> write32(buf + 8, 0x7d6802a6); // mflr r11 write32(buf + 12, 0x7c0803a6); // mtlr r0 write32(buf + 16, 0x7d8b6050); // subf r12, r11, r12 write32(buf + 20, 0x380cffcc); // subi r0,r12,52 write32(buf + 24, 0x7800f082); // srdi r0,r0,62,2 write32(buf + 28, 0xe98b002c); // ld r12,44(r11) write32(buf + 32, 0x7d6c5a14); // add r11,r12,r11 write32(buf + 36, 0xe98b0000); // ld r12,0(r11) write32(buf + 40, 0xe96b0008); // ld r11,8(r11) write32(buf + 44, 0x7d8903a6); // mtctr r12 write32(buf + 48, 0x4e800420); // bctr // The 'bcl' instruction will set the link register to the address of the // following instruction ('mflr r11'). Here we store the offset from that // instruction to the first entry in the GotPlt section. int64_t gotPltOffset = in.gotPlt->getVA() - (in.plt->getVA() + 8); write64(buf + 52, gotPltOffset); } void PPC64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const { int32_t offset = pltHeaderSize + index * pltEntrySize; // bl __glink_PLTresolve write32(buf, 0x48000000 | ((-offset) & 0x03FFFFFc)); } static std::pair toAddr16Rel(RelType type, uint64_t val) { // Relocations relative to the toc-base need to be adjusted by the Toc offset. uint64_t tocBiasedVal = val - ppc64TocOffset; // Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset. uint64_t dtpBiasedVal = val - dynamicThreadPointerOffset; switch (type) { // TOC biased relocation. case R_PPC64_GOT16: case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSLD16: case R_PPC64_TOC16: return {R_PPC64_ADDR16, tocBiasedVal}; case R_PPC64_GOT16_DS: case R_PPC64_TOC16_DS: case R_PPC64_GOT_TPREL16_DS: case R_PPC64_GOT_DTPREL16_DS: return {R_PPC64_ADDR16_DS, tocBiasedVal}; case R_PPC64_GOT16_HA: case R_PPC64_GOT_TLSGD16_HA: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TPREL16_HA: case R_PPC64_GOT_DTPREL16_HA: case R_PPC64_TOC16_HA: return {R_PPC64_ADDR16_HA, tocBiasedVal}; case R_PPC64_GOT16_HI: case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSLD16_HI: case R_PPC64_GOT_TPREL16_HI: case R_PPC64_GOT_DTPREL16_HI: case R_PPC64_TOC16_HI: return {R_PPC64_ADDR16_HI, tocBiasedVal}; case R_PPC64_GOT16_LO: case R_PPC64_GOT_TLSGD16_LO: case R_PPC64_GOT_TLSLD16_LO: case R_PPC64_TOC16_LO: return {R_PPC64_ADDR16_LO, tocBiasedVal}; case R_PPC64_GOT16_LO_DS: case R_PPC64_TOC16_LO_DS: case R_PPC64_GOT_TPREL16_LO_DS: case R_PPC64_GOT_DTPREL16_LO_DS: return {R_PPC64_ADDR16_LO_DS, tocBiasedVal}; // Dynamic Thread pointer biased relocation types. case R_PPC64_DTPREL16: return {R_PPC64_ADDR16, dtpBiasedVal}; case R_PPC64_DTPREL16_DS: return {R_PPC64_ADDR16_DS, dtpBiasedVal}; case R_PPC64_DTPREL16_HA: return {R_PPC64_ADDR16_HA, dtpBiasedVal}; case R_PPC64_DTPREL16_HI: return {R_PPC64_ADDR16_HI, dtpBiasedVal}; case R_PPC64_DTPREL16_HIGHER: return {R_PPC64_ADDR16_HIGHER, dtpBiasedVal}; case R_PPC64_DTPREL16_HIGHERA: return {R_PPC64_ADDR16_HIGHERA, dtpBiasedVal}; case R_PPC64_DTPREL16_HIGHEST: return {R_PPC64_ADDR16_HIGHEST, dtpBiasedVal}; case R_PPC64_DTPREL16_HIGHESTA: return {R_PPC64_ADDR16_HIGHESTA, dtpBiasedVal}; case R_PPC64_DTPREL16_LO: return {R_PPC64_ADDR16_LO, dtpBiasedVal}; case R_PPC64_DTPREL16_LO_DS: return {R_PPC64_ADDR16_LO_DS, dtpBiasedVal}; case R_PPC64_DTPREL64: return {R_PPC64_ADDR64, dtpBiasedVal}; default: return {type, val}; } } static bool isTocOptType(RelType type) { switch (type) { case R_PPC64_GOT16_HA: case R_PPC64_GOT16_LO_DS: case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: case R_PPC64_TOC16_LO: return true; default: return false; } } void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // We need to save the original relocation type to use in diagnostics, and // use the original type to determine if we should toc-optimize the // instructions being relocated. RelType originalType = type; bool shouldTocOptimize = isTocOptType(type); // For dynamic thread pointer relative, toc-relative, and got-indirect // relocations, proceed in terms of the corresponding ADDR16 relocation type. std::tie(type, val) = toAddr16Rel(type, val); switch (type) { case R_PPC64_ADDR14: { checkAlignment(loc, val, 4, type); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = loc[3]; write16(loc + 2, (aalk & 3) | (val & 0xfffc)); break; } case R_PPC64_ADDR16: checkIntUInt(loc, val, 16, originalType); write16(loc, val); break; case R_PPC64_ADDR32: checkIntUInt(loc, val, 32, originalType); write32(loc, val); break; case R_PPC64_ADDR16_DS: case R_PPC64_TPREL16_DS: { checkInt(loc, val, 16, originalType); // DQ-form instructions use bits 28-31 as part of the instruction encoding // DS-form instructions only use bits 30-31. uint16_t mask = isDQFormInstruction(readFromHalf16(loc)) ? 0xf : 0x3; checkAlignment(loc, lo(val), mask + 1, originalType); write16(loc, (read16(loc) & mask) | lo(val)); } break; case R_PPC64_ADDR16_HA: case R_PPC64_REL16_HA: case R_PPC64_TPREL16_HA: if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) writeFromHalf16(loc, 0x60000000); else write16(loc, ha(val)); break; case R_PPC64_ADDR16_HI: case R_PPC64_REL16_HI: case R_PPC64_TPREL16_HI: write16(loc, hi(val)); break; case R_PPC64_ADDR16_HIGHER: case R_PPC64_TPREL16_HIGHER: write16(loc, higher(val)); break; case R_PPC64_ADDR16_HIGHERA: case R_PPC64_TPREL16_HIGHERA: write16(loc, highera(val)); break; case R_PPC64_ADDR16_HIGHEST: case R_PPC64_TPREL16_HIGHEST: write16(loc, highest(val)); break; case R_PPC64_ADDR16_HIGHESTA: case R_PPC64_TPREL16_HIGHESTA: write16(loc, highesta(val)); break; case R_PPC64_ADDR16_LO: case R_PPC64_REL16_LO: case R_PPC64_TPREL16_LO: // When the high-adjusted part of a toc relocation evalutes to 0, it is // changed into a nop. The lo part then needs to be updated to use the // toc-pointer register r2, as the base register. if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { uint32_t insn = readFromHalf16(loc); if (isInstructionUpdateForm(insn)) error(getErrorLocation(loc) + "can't toc-optimize an update instruction: 0x" + utohexstr(insn)); writeFromHalf16(loc, (insn & 0xffe00000) | 0x00020000 | lo(val)); } else { write16(loc, lo(val)); } break; case R_PPC64_ADDR16_LO_DS: case R_PPC64_TPREL16_LO_DS: { // DQ-form instructions use bits 28-31 as part of the instruction encoding // DS-form instructions only use bits 30-31. uint32_t insn = readFromHalf16(loc); uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3; checkAlignment(loc, lo(val), mask + 1, originalType); if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { // When the high-adjusted part of a toc relocation evalutes to 0, it is // changed into a nop. The lo part then needs to be updated to use the toc // pointer register r2, as the base register. if (isInstructionUpdateForm(insn)) error(getErrorLocation(loc) + "Can't toc-optimize an update instruction: 0x" + Twine::utohexstr(insn)); insn &= 0xffe00000 | mask; writeFromHalf16(loc, insn | 0x00020000 | lo(val)); } else { write16(loc, (read16(loc) & mask) | lo(val)); } } break; case R_PPC64_TPREL16: checkInt(loc, val, 16, originalType); write16(loc, val); break; case R_PPC64_REL32: checkInt(loc, val, 32, type); write32(loc, val); break; case R_PPC64_ADDR64: case R_PPC64_REL64: case R_PPC64_TOC: write64(loc, val); break; case R_PPC64_REL14: { uint32_t mask = 0x0000FFFC; checkInt(loc, val, 16, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC64_REL24: { uint32_t mask = 0x03FFFFFC; checkInt(loc, val, 26, type); checkAlignment(loc, val, 4, type); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC64_DTPREL64: write64(loc, val - dynamicThreadPointerOffset); break; default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + llvm_unreachable("unknown relocation"); } } bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s) const { if (type != R_PPC64_REL14 && type != R_PPC64_REL24) return false; // If a function is in the Plt it needs to be called with a call-stub. if (s.isInPlt()) return true; // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) return false; // If the offset exceeds the range of the branch type then it will need // a range-extending thunk. // See the comment in getRelocTargetVA() about R_PPC64_CALL. return !inBranchRange(type, branchAddr, s.getVA() + getPPC64GlobalEntryToLocalEntryOffset(s.stOther)); } uint32_t PPC64::getThunkSectionSpacing() const { // See comment in Arch/ARM.cpp for a more detailed explanation of // getThunkSectionSpacing(). For PPC64 we pick the constant here based on // R_PPC64_REL24, which is used by unconditional branch instructions. // 0x2000000 = (1 << 24-1) * 4 return 0x2000000; } bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { int64_t offset = dst - src; if (type == R_PPC64_REL14) return isInt<16>(offset); if (type == R_PPC64_REL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; return expr; } // Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` uses 4 instructions. // Instruction Relocation Symbol // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x // R_PPC64_REL24 __tls_get_addr // nop None None // // Relaxing to initial-exec entails: // 1) Convert the addis/addi pair that builds the address of the tls_index // struct for 'x' to an addis/ld pair that loads an offset from a got-entry. // 2) Convert the call to __tls_get_addr to a nop. // 3) Convert the nop following the call to an add of the loaded offset to the // thread pointer. // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is // used as the relaxation hint for both steps 2 and 3. void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC64_GOT_TLSGD16_HA: // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to // addis rT, r2, sym@got@tprel@ha. relocateOne(loc, R_PPC64_GOT_TPREL16_HA, val); return; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: { // Relax from addi r3, rA, sym@got@tlsgd@l to // ld r3, sym@got@tprel@l(rA) uint32_t ra = (readFromHalf16(loc) & (0x1f << 16)); writeFromHalf16(loc, 0xe8600000 | ra); relocateOne(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } case R_PPC64_TLSGD: write32(loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 return; default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } // The prologue for a split-stack function is expected to look roughly // like this: // .Lglobal_entry_point: // # TOC pointer initalization. // ... // .Llocal_entry_point: // # load the __private_ss member of the threads tcbhead. // ld r0,-0x7000-64(r13) // # subtract the functions stack size from the stack pointer. // addis r12, r1, ha(-stack-frame size) // addi r12, r12, l(-stack-frame size) // # compare needed to actual and branch to allocate_more_stack if more // # space is needed, otherwise fallthrough to 'normal' function body. // cmpld cr7,r12,r0 // blt- cr7, .Lallocate_more_stack // // -) The allocate_more_stack block might be placed after the split-stack // prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body` // instead. // -) If either the addis or addi is not needed due to the stack size being // smaller then 32K or a multiple of 64K they will be replaced with a nop, // but there will always be 2 instructions the linker can overwrite for the // adjusted stack size. // // The linkers job here is to increase the stack size used in the addis/addi // pair by split-stack-size-adjust. // addis r12, r1, ha(-stack-frame size - split-stack-adjust-size) // addi r12, r12, l(-stack-frame size - split-stack-adjust-size) bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const { // If the caller has a global entry point adjust the buffer past it. The start // of the split-stack prologue will be at the local entry point. loc += getPPC64GlobalEntryToLocalEntryOffset(stOther); // At the very least we expect to see a load of some split-stack data from the // tcb, and 2 instructions that calculate the ending stack address this // function will require. If there is not enough room for at least 3 // instructions it can't be a split-stack prologue. if (loc + 12 >= end) return false; // First instruction must be `ld r0, -0x7000-64(r13)` if (read32(loc) != 0xe80d8fc0) return false; int16_t hiImm = 0; int16_t loImm = 0; // First instruction can be either an addis if the frame size is larger then // 32K, or an addi if the size is less then 32K. int32_t firstInstr = read32(loc + 4); if (getPrimaryOpCode(firstInstr) == 15) { hiImm = firstInstr & 0xFFFF; } else if (getPrimaryOpCode(firstInstr) == 14) { loImm = firstInstr & 0xFFFF; } else { return false; } // Second instruction is either an addi or a nop. If the first instruction was // an addi then LoImm is set and the second instruction must be a nop. uint32_t secondInstr = read32(loc + 8); if (!loImm && getPrimaryOpCode(secondInstr) == 14) { loImm = secondInstr & 0xFFFF; } else if (secondInstr != 0x60000000) { return false; } // The register operands of the first instruction should be the stack-pointer // (r1) as the input (RA) and r12 as the output (RT). If the second // instruction is not a nop, then it should use r12 as both input and output. auto checkRegOperands = [](uint32_t instr, uint8_t expectedRT, uint8_t expectedRA) { return ((instr & 0x3E00000) >> 21 == expectedRT) && ((instr & 0x1F0000) >> 16 == expectedRA); }; if (!checkRegOperands(firstInstr, 12, 1)) return false; if (secondInstr != 0x60000000 && !checkRegOperands(secondInstr, 12, 12)) return false; int32_t stackFrameSize = (hiImm * 65536) + loImm; // Check that the adjusted size doesn't overflow what we can represent with 2 // instructions. if (stackFrameSize < config->splitStackAdjustSize + INT32_MIN) { error(getErrorLocation(loc) + "split-stack prologue adjustment overflows"); return false; } int32_t adjustedStackFrameSize = stackFrameSize - config->splitStackAdjustSize; loImm = adjustedStackFrameSize & 0xFFFF; hiImm = (adjustedStackFrameSize + 0x8000) >> 16; if (hiImm) { write32(loc + 4, 0x3D810000 | (uint16_t)hiImm); // If the low immediate is zero the second instruction will be a nop. secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : 0x60000000; write32(loc + 8, secondInstr); } else { // addi r12, r1, imm write32(loc + 4, (0x39810000) | (uint16_t)loImm); write32(loc + 8, 0x60000000); } return true; } TargetInfo *elf::getPPC64TargetInfo() { static PPC64 target; return ⌖ } Index: vendor/lld/dist-release_90/ELF/InputFiles.cpp =================================================================== --- vendor/lld/dist-release_90/ELF/InputFiles.cpp (revision 351717) +++ vendor/lld/dist-release_90/ELF/InputFiles.cpp (revision 351718) @@ -1,1645 +1,1645 @@ //===- InputFiles.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Driver.h" #include "InputSection.h" #include "LinkerScript.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/LTO/LTO.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::sys; using namespace llvm::sys::fs; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; std::vector elf::binaryFiles; std::vector elf::bitcodeFiles; std::vector elf::lazyObjFiles; std::vector elf::objectFiles; std::vector elf::sharedFiles; std::unique_ptr elf::tar; static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { unsigned char size; unsigned char endian; std::tie(size, endian) = getElfArchType(mb.getBuffer()); auto report = [&](StringRef msg) { StringRef filename = mb.getBufferIdentifier(); if (archiveName.empty()) fatal(filename + ": " + msg); else fatal(archiveName + "(" + filename + "): " + msg); }; if (!mb.getBuffer().startswith(ElfMagic)) report("not an ELF file"); if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) report("corrupted ELF file: invalid data encoding"); if (size != ELFCLASS32 && size != ELFCLASS64) report("corrupted ELF file: invalid file class"); size_t bufSize = mb.getBuffer().size(); if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) report("corrupted ELF file: file is too short"); if (size == ELFCLASS32) return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; } InputFile::InputFile(Kind k, MemoryBufferRef m) : mb(m), groupId(nextGroupId), fileKind(k) { // All files within the same --{start,end}-group get the same group ID. // Otherwise, a new file will get a new group ID. if (!isInGroup) ++nextGroupId; } Optional elf::readFile(StringRef path) { // The --chroot option changes our virtual root directory. // This is useful when you are dealing with files created by --reproduce. if (!config->chroot.empty() && path.startswith("/")) path = saver.save(config->chroot + path); log(path); auto mbOrErr = MemoryBuffer::getFile(path, -1, false); if (auto ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); return None; } std::unique_ptr &mb = *mbOrErr; MemoryBufferRef mbref = mb->getMemBufferRef(); make>(std::move(mb)); // take MB ownership if (tar) tar->append(relativeToRoot(path), mbref.getBuffer()); return mbref; } // All input object files must be for the same architecture // (e.g. it does not make sense to link x86 object files with // MIPS object files.) This function checks for that error. static bool isCompatible(InputFile *file) { if (!file->isElf() && !isa(file)) return true; if (file->ekind == config->ekind && file->emachine == config->emachine) { if (config->emachine != EM_MIPS) return true; if (isMipsN32Abi(file) == config->mipsN32Abi) return true; } if (!config->emulation.empty()) { error(toString(file) + " is incompatible with " + config->emulation); } else { InputFile *existing; if (!objectFiles.empty()) existing = objectFiles[0]; else if (!sharedFiles.empty()) existing = sharedFiles[0]; else existing = bitcodeFiles[0]; error(toString(file) + " is incompatible with " + toString(existing)); } return false; } template static void doParseFile(InputFile *file) { if (!isCompatible(file)) return; // Binary file if (auto *f = dyn_cast(file)) { binaryFiles.push_back(f); f->parse(); return; } // .a file if (auto *f = dyn_cast(file)) { f->parse(); return; } // Lazy object file if (auto *f = dyn_cast(file)) { lazyObjFiles.push_back(f); f->parse(); return; } if (config->trace) message(toString(file)); // .so file if (auto *f = dyn_cast(file)) { f->parse(); return; } // LLVM bitcode file if (auto *f = dyn_cast(file)) { bitcodeFiles.push_back(f); f->parse(); return; } // Regular object file objectFiles.push_back(file); cast>(file)->parse(); } // Add symbols in File to the symbol table. void elf::parseFile(InputFile *file) { switch (config->ekind) { case ELF32LEKind: doParseFile(file); return; case ELF32BEKind: doParseFile(file); return; case ELF64LEKind: doParseFile(file); return; case ELF64BEKind: doParseFile(file); return; default: llvm_unreachable("unknown ELFT"); } } // Concatenates arguments to construct a string representing an error location. static std::string createFileLineMsg(StringRef path, unsigned line) { std::string filename = path::filename(path); std::string lineno = ":" + std::to_string(line); if (filename == path) return filename + lineno; return filename + lineno + " (" + path.str() + lineno + ")"; } template static std::string getSrcMsgAux(ObjFile &file, const Symbol &sym, InputSectionBase &sec, uint64_t offset) { // In DWARF, functions and variables are stored to different places. // First, lookup a function for a given offset. if (Optional info = file.getDILineInfo(&sec, offset)) return createFileLineMsg(info->FileName, info->Line); // If it failed, lookup again as a variable. if (Optional> fileLine = file.getVariableLoc(sym.getName())) return createFileLineMsg(fileLine->first, fileLine->second); // File.sourceFile contains STT_FILE symbol, and that is a last resort. return file.sourceFile; } std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, uint64_t offset) { if (kind() != ObjKind) return ""; switch (config->ekind) { default: llvm_unreachable("Invalid kind"); case ELF32LEKind: return getSrcMsgAux(cast>(*this), sym, sec, offset); case ELF32BEKind: return getSrcMsgAux(cast>(*this), sym, sec, offset); case ELF64LEKind: return getSrcMsgAux(cast>(*this), sym, sec, offset); case ELF64BEKind: return getSrcMsgAux(cast>(*this), sym, sec, offset); } } template void ObjFile::initializeDwarf() { dwarf = llvm::make_unique(make_unique>(this)); for (std::unique_ptr &cu : dwarf->compile_units()) { auto report = [](Error err) { handleAllErrors(std::move(err), [](ErrorInfoBase &info) { warn(info.message()); }); }; Expected expectedLT = dwarf->getLineTableForUnit(cu.get(), report); const DWARFDebugLine::LineTable *lt = nullptr; if (expectedLT) lt = *expectedLT; else report(expectedLT.takeError()); if (!lt) continue; lineTables.push_back(lt); // Loop over variable records and insert them to variableLoc. for (const auto &entry : cu->dies()) { DWARFDie die(cu.get(), &entry); // Skip all tags that are not variables. if (die.getTag() != dwarf::DW_TAG_variable) continue; // Skip if a local variable because we don't need them for generating // error messages. In general, only non-local symbols can fail to be // linked. if (!dwarf::toUnsigned(die.find(dwarf::DW_AT_external), 0)) continue; // Get the source filename index for the variable. unsigned file = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_file), 0); if (!lt->hasFileAtIndex(file)) continue; // Get the line number on which the variable is declared. unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0); // Here we want to take the variable name to add it into variableLoc. // Variable can have regular and linkage name associated. At first, we try // to get linkage name as it can be different, for example when we have // two variables in different namespaces of the same object. Use common // name otherwise, but handle the case when it also absent in case if the // input object file lacks some debug info. StringRef name = dwarf::toString(die.find(dwarf::DW_AT_linkage_name), dwarf::toString(die.find(dwarf::DW_AT_name), "")); if (!name.empty()) variableLoc.insert({name, {lt, file, line}}); } } } // Returns the pair of file name and line number describing location of data // object (variable, array, etc) definition. template Optional> ObjFile::getVariableLoc(StringRef name) { llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); // Return if we have no debug information about data object. auto it = variableLoc.find(name); if (it == variableLoc.end()) return None; // Take file name string from line table. std::string fileName; if (!it->second.lt->getFileNameByIndex( it->second.file, {}, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName)) return None; return std::make_pair(fileName, it->second.line); } // Returns source line information for a given offset // using DWARF debug info. template Optional ObjFile::getDILineInfo(InputSectionBase *s, uint64_t offset) { llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); // Detect SectionIndex for specified section. uint64_t sectionIndex = object::SectionedAddress::UndefSection; ArrayRef sections = s->file->getSections(); for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { if (s == sections[curIndex]) { sectionIndex = curIndex; break; } } // Use fake address calcuated by adding section file offset and offset in // section. See comments for ObjectInfo class. DILineInfo info; for (const llvm::DWARFDebugLine::LineTable *lt : lineTables) { if (lt->getFileLineInfoForAddress( {s->getOffsetInFile() + offset, sectionIndex}, nullptr, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, info)) return info; } return None; } // Returns "", "foo.a(bar.o)" or "baz.o". std::string lld::toString(const InputFile *f) { if (!f) return ""; if (f->toStringCache.empty()) { if (f->archiveName.empty()) f->toStringCache = f->getName(); else f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); } return f->toStringCache; } ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { ekind = getELFKind(mb, ""); switch (ekind) { case ELF32LEKind: init(); break; case ELF32BEKind: init(); break; case ELF64LEKind: init(); break; case ELF64BEKind: init(); break; default: llvm_unreachable("getELFKind"); } } template static const Elf_Shdr *findSection(ArrayRef sections, uint32_t type) { for (const Elf_Shdr &sec : sections) if (sec.sh_type == type) return &sec; return nullptr; } template void ELFFileBase::init() { using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; // Initialize trivial attributes. const ELFFile &obj = getObj(); emachine = obj.getHeader()->e_machine; osabi = obj.getHeader()->e_ident[llvm::ELF::EI_OSABI]; abiVersion = obj.getHeader()->e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef sections = CHECK(obj.sections(), this); // Find a symbol table. bool isDSO = (identify_magic(mb.getBuffer()) == file_magic::elf_shared_object); const Elf_Shdr *symtabSec = findSection(sections, isDSO ? SHT_DYNSYM : SHT_SYMTAB); if (!symtabSec) return; // Initialize members corresponding to a symbol table. firstGlobal = symtabSec->sh_info; ArrayRef eSyms = CHECK(obj.symbols(symtabSec), this); if (firstGlobal == 0 || firstGlobal > eSyms.size()) fatal(toString(this) + ": invalid sh_info in symbol table"); elfSyms = reinterpret_cast(eSyms.data()); numELFSyms = eSyms.size(); stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this); } template uint32_t ObjFile::getSectionIndex(const Elf_Sym &sym) const { return CHECK( this->getObj().getSectionIndex(&sym, getELFSyms(), shndxTable), this); } template ArrayRef ObjFile::getLocalSymbols() { if (this->symbols.empty()) return {}; return makeArrayRef(this->symbols).slice(1, this->firstGlobal - 1); } template ArrayRef ObjFile::getGlobalSymbols() { return makeArrayRef(this->symbols).slice(this->firstGlobal); } template void ObjFile::parse(bool ignoreComdats) { // Read a section table. justSymbols is usually false. if (this->justSymbols) initializeJustSymbols(); else initializeSections(ignoreComdats); // Read a symbol table. initializeSymbols(); } // Sections with SHT_GROUP and comdat bits define comdat section groups. // They are identified and deduplicated by group name. This function // returns a group name. template StringRef ObjFile::getShtGroupSignature(ArrayRef sections, const Elf_Shdr &sec) { typename ELFT::SymRange symbols = this->getELFSyms(); if (sec.sh_info >= symbols.size()) fatal(toString(this) + ": invalid symbol index"); const typename ELFT::Sym &sym = symbols[sec.sh_info]; StringRef signature = CHECK(sym.getName(this->stringTable), this); // As a special case, if a symbol is a section symbol and has no name, // we use a section name as a signature. // // Such SHT_GROUP sections are invalid from the perspective of the ELF // standard, but GNU gold 1.14 (the newest version as of July 2017) or // older produce such sections as outputs for the -r option, so we need // a bug-compatibility. if (signature.empty() && sym.getType() == STT_SECTION) return getSectionName(sec); return signature; } template bool ObjFile::shouldMerge(const Elf_Shdr &sec) { // On a regular link we don't merge sections if -O0 (default is -O1). This // sometimes makes the linker significantly faster, although the output will // be bigger. // // Doing the same for -r would create a problem as it would combine sections // with different sh_entsize. One option would be to just copy every SHF_MERGE // section as is to the output. While this would produce a valid ELF file with // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when // they see two .debug_str. We could have separate logic for combining // SHF_MERGE sections based both on their name and sh_entsize, but that seems // to be more trouble than it is worth. Instead, we just use the regular (-O1) // logic for -r. if (config->optimize == 0 && !config->relocatable) return false; // A mergeable section with size 0 is useless because they don't have // any data to merge. A mergeable string section with size 0 can be // argued as invalid because it doesn't end with a null character. // We'll avoid a mess by handling them as if they were non-mergeable. if (sec.sh_size == 0) return false; // Check for sh_entsize. The ELF spec is not clear about the zero // sh_entsize. It says that "the member [sh_entsize] contains 0 if // the section does not hold a table of fixed-size entries". We know // that Rust 1.13 produces a string mergeable section with a zero // sh_entsize. Here we just accept it rather than being picky about it. uint64_t entSize = sec.sh_entsize; if (entSize == 0) return false; if (sec.sh_size % entSize) fatal(toString(this) + ": SHF_MERGE section size must be a multiple of sh_entsize"); uint64_t flags = sec.sh_flags; if (!(flags & SHF_MERGE)) return false; if (flags & SHF_WRITE) fatal(toString(this) + ": writable SHF_MERGE section is not supported"); return true; } // This is for --just-symbols. // // --just-symbols is a very minor feature that allows you to link your // output against other existing program, so that if you load both your // program and the other program into memory, your output can refer the // other program's symbols. // // When the option is given, we link "just symbols". The section table is // initialized with null pointers. template void ObjFile::initializeJustSymbols() { ArrayRef sections = CHECK(this->getObj().sections(), this); this->sections.resize(sections.size()); } // An ELF object file may contain a `.deplibs` section. If it exists, the // section contains a list of library specifiers such as `m` for libm. This // function resolves a given name by finding the first matching library checking // the various ways that a library can be specified to LLD. This ELF extension // is a form of autolinking and is called `dependent libraries`. It is currently // unique to LLVM and lld. static void addDependentLibrary(StringRef specifier, const InputFile *f) { if (!config->dependentLibraries) return; if (fs::exists(specifier)) driver->addFile(specifier, /*withLOption=*/false); else if (Optional s = findFromSearchPaths(specifier)) driver->addFile(*s, /*withLOption=*/true); else if (Optional s = searchLibraryBaseName(specifier)) driver->addFile(*s, /*withLOption=*/true); else error(toString(f) + ": unable to find library from dependent library specifier: " + specifier); } template void ObjFile::initializeSections(bool ignoreComdats) { const ELFFile &obj = this->getObj(); ArrayRef objSections = CHECK(obj.sections(), this); uint64_t size = objSections.size(); this->sections.resize(size); this->sectionStringTable = CHECK(obj.getSectionStringTable(objSections), this); for (size_t i = 0, e = objSections.size(); i < e; i++) { if (this->sections[i] == &InputSection::discarded) continue; const Elf_Shdr &sec = objSections[i]; if (sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE) cgProfile = check(obj.template getSectionContentsAsArray(&sec)); // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. // This is compatible with GNU. if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { if (sec.sh_type == SHT_LLVM_ADDRSIG) { // We ignore the address-significance table if we know that the object // file was created by objcopy or ld -r. This is because these tools // will reorder the symbols in the symbol table, invalidating the data // in the address-significance table, which refers to symbols by index. if (sec.sh_link != 0) this->addrsigSec = &sec; else if (config->icf == ICFLevel::Safe) warn(toString(this) + ": --icf=safe is incompatible with object " "files created using objcopy or ld -r"); } this->sections[i] = &InputSection::discarded; continue; } switch (sec.sh_type) { case SHT_GROUP: { // De-duplicate section groups by their signatures. StringRef signature = getShtGroupSignature(objSections, sec); this->sections[i] = &InputSection::discarded; ArrayRef entries = CHECK(obj.template getSectionContentsAsArray(&sec), this); if (entries.empty()) fatal(toString(this) + ": empty SHT_GROUP"); // The first word of a SHT_GROUP section contains flags. Currently, // the standard defines only "GRP_COMDAT" flag for the COMDAT group. // An group with the empty flag doesn't define anything; such sections // are just skipped. if (entries[0] == 0) continue; if (entries[0] != GRP_COMDAT) fatal(toString(this) + ": unsupported SHT_GROUP format"); bool isNew = ignoreComdats || symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this) .second; if (isNew) { if (config->relocatable) this->sections[i] = createInputSection(sec); continue; } // Otherwise, discard group members. for (uint32_t secIndex : entries.slice(1)) { if (secIndex >= size) fatal(toString(this) + ": invalid section index in group: " + Twine(secIndex)); this->sections[secIndex] = &InputSection::discarded; } break; } case SHT_SYMTAB_SHNDX: shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this); break; case SHT_SYMTAB: case SHT_STRTAB: case SHT_NULL: break; default: this->sections[i] = createInputSection(sec); } // .ARM.exidx sections have a reverse dependency on the InputSection they // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. if (sec.sh_flags & SHF_LINK_ORDER) { InputSectionBase *linkSec = nullptr; if (sec.sh_link < this->sections.size()) linkSec = this->sections[sec.sh_link]; if (!linkSec) fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); InputSection *isec = cast(this->sections[i]); linkSec->dependentSections.push_back(isec); if (!isa(linkSec)) error("a section " + isec->name + " with SHF_LINK_ORDER should not refer a non-regular " "section: " + toString(linkSec)); } } } // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how // the input objects have been compiled. static void updateARMVFPArgs(const ARMAttributeParser &attributes, const InputFile *f) { if (!attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args)) // If an ABI tag isn't present then it is implicitly given the value of 0 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, // including some in glibc that don't use FP args (and should have value 3) // don't have the attribute so we do not consider an implicit value of 0 // as a clash. return; unsigned vfpArgs = attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); ARMVFPArgKind arg; switch (vfpArgs) { case ARMBuildAttrs::BaseAAPCS: arg = ARMVFPArgKind::Base; break; case ARMBuildAttrs::HardFPAAPCS: arg = ARMVFPArgKind::VFP; break; case ARMBuildAttrs::ToolChainFPPCS: // Tool chain specific convention that conforms to neither AAPCS variant. arg = ARMVFPArgKind::ToolChain; break; case ARMBuildAttrs::CompatibleFPAAPCS: // Object compatible with all conventions. return; default: error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); return; } // Follow ld.bfd and error if there is a mix of calling conventions. if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) error(toString(f) + ": incompatible Tag_ABI_VFP_args"); else config->armVFPArgs = arg; } // The ARM support in lld makes some use of instructions that are not available // on all ARM architectures. Namely: // - Use of BLX instruction for interworking between ARM and Thumb state. // - Use of the extended Thumb branch encoding in relocation. // - Use of the MOVT/MOVW instructions in Thumb Thunks. // The ARM Attributes section contains information about the architecture chosen // at compile time. We follow the convention that if at least one input object // is compiled with an architecture that supports these features then lld is // permitted to use them. static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { if (!attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) return; auto arch = attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); switch (arch) { case ARMBuildAttrs::Pre_v4: case ARMBuildAttrs::v4: case ARMBuildAttrs::v4T: // Architectures prior to v5 do not support BLX instruction break; case ARMBuildAttrs::v5T: case ARMBuildAttrs::v5TE: case ARMBuildAttrs::v5TEJ: case ARMBuildAttrs::v6: case ARMBuildAttrs::v6KZ: case ARMBuildAttrs::v6K: config->armHasBlx = true; // Architectures used in pre-Cortex processors do not support // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. break; default: // All other Architectures have BLX and extended branch encoding config->armHasBlx = true; config->armJ1J2BranchEncoding = true; if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) // All Architectures used in Cortex processors with the exception // of v6-M and v6S-M have the MOVT and MOVW instructions. config->armHasMovtMovw = true; break; } } // If a source file is compiled with x86 hardware-assisted call flow control // enabled, the generated object file contains feature flags indicating that // fact. This function reads the feature flags and returns it. // // Essentially we want to read a single 32-bit value in this function, but this // function is rather complicated because the value is buried deep inside a // .note.gnu.property section. // // The section consists of one or more NOTE records. Each NOTE record consists // of zero or more type-length-value fields. We want to find a field of a // certain type. It seems a bit too much to just store a 32-bit value, perhaps // the ABI is unnecessarily complicated. template static uint32_t readAndFeatures(ObjFile *obj, ArrayRef data) { using Elf_Nhdr = typename ELFT::Nhdr; using Elf_Note = typename ELFT::Note; uint32_t featuresSet = 0; while (!data.empty()) { // Read one NOTE record. if (data.size() < sizeof(Elf_Nhdr)) fatal(toString(obj) + ": .note.gnu.property: section too short"); auto *nhdr = reinterpret_cast(data.data()); if (data.size() < nhdr->getSize()) fatal(toString(obj) + ": .note.gnu.property: section too short"); Elf_Note note(*nhdr); if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { data = data.slice(nhdr->getSize()); continue; } uint32_t featureAndType = config->emachine == EM_AARCH64 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND : GNU_PROPERTY_X86_FEATURE_1_AND; // Read a body of a NOTE record, which consists of type-length-value fields. ArrayRef desc = note.getDesc(); while (!desc.empty()) { if (desc.size() < 8) fatal(toString(obj) + ": .note.gnu.property: section too short"); uint32_t type = read32le(desc.data()); uint32_t size = read32le(desc.data() + 4); if (type == featureAndType) { // We found a FEATURE_1_AND field. There may be more than one of these // in a .note.gnu.propery section, for a relocatable object we // accumulate the bits set. featuresSet |= read32le(desc.data() + 8); } // On 64-bit, a payload may be followed by a 4-byte padding to make its // size a multiple of 8. if (ELFT::Is64Bits) size = alignTo(size, 8); desc = desc.slice(size + 8); // +8 for Type and Size } // Go to next NOTE record to look for more FEATURE_1_AND descriptions. data = data.slice(nhdr->getSize()); } return featuresSet; } template InputSectionBase *ObjFile::getRelocTarget(const Elf_Shdr &sec) { uint32_t idx = sec.sh_info; if (idx >= this->sections.size()) fatal(toString(this) + ": invalid relocated section index: " + Twine(idx)); InputSectionBase *target = this->sections[idx]; // Strictly speaking, a relocation section must be included in the // group of the section it relocates. However, LLVM 3.3 and earlier // would fail to do so, so we gracefully handle that case. if (target == &InputSection::discarded) return nullptr; if (!target) fatal(toString(this) + ": unsupported relocation reference"); return target; } // Create a regular InputSection class that has the same contents // as a given section. static InputSection *toRegularSection(MergeInputSection *sec) { return make(sec->file, sec->flags, sec->type, sec->alignment, sec->data(), sec->name); } template InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { StringRef name = getSectionName(sec); switch (sec.sh_type) { case SHT_ARM_ATTRIBUTES: { if (config->emachine != EM_ARM) break; ARMAttributeParser attributes; ArrayRef contents = check(this->getObj().getSectionContents(&sec)); attributes.Parse(contents, /*isLittle*/ config->ekind == ELF32LEKind); updateSupportedARMFeatures(attributes); updateARMVFPArgs(attributes, this); // FIXME: Retain the first attribute section we see. The eglibc ARM // dynamic loaders require the presence of an attribute section for dlopen // to work. In a full implementation we would merge all attribute sections. if (in.armAttributes == nullptr) { in.armAttributes = make(*this, sec, name); return in.armAttributes; } return &InputSection::discarded; } case SHT_LLVM_DEPENDENT_LIBRARIES: { if (config->relocatable) break; ArrayRef data = CHECK(this->getObj().template getSectionContentsAsArray(&sec), this); if (!data.empty() && data.back() != '\0') { error(toString(this) + ": corrupted dependent libraries section (unterminated string): " + name); return &InputSection::discarded; } for (const char *d = data.begin(), *e = data.end(); d < e;) { StringRef s(d); addDependentLibrary(s, this); d += s.size() + 1; } return &InputSection::discarded; } case SHT_RELA: case SHT_REL: { // Find a relocation target section and associate this section with that. // Target may have been discarded if it is in a different section group // and the group is discarded, even though it's a violation of the // spec. We handle that situation gracefully by discarding dangling // relocation sections. InputSectionBase *target = getRelocTarget(sec); if (!target) return nullptr; // This section contains relocation information. // If -r is given, we do not interpret or apply relocation // but just copy relocation sections to output. if (config->relocatable) { InputSection *relocSec = make(*this, sec, name); // We want to add a dependency to target, similar like we do for // -emit-relocs below. This is useful for the case when linker script // contains the "/DISCARD/". It is perhaps uncommon to use a script with // -r, but we faced it in the Linux kernel and have to handle such case // and not to crash. target->dependentSections.push_back(relocSec); return relocSec; } if (target->firstRelocation) fatal(toString(this) + ": multiple relocation sections to one section are not supported"); // ELF spec allows mergeable sections with relocations, but they are // rare, and it is in practice hard to merge such sections by contents, // because applying relocations at end of linking changes section // contents. So, we simply handle such sections as non-mergeable ones. // Degrading like this is acceptable because section merging is optional. if (auto *ms = dyn_cast(target)) { target = toRegularSection(ms); this->sections[sec.sh_info] = target; } if (sec.sh_type == SHT_RELA) { ArrayRef rels = CHECK(getObj().relas(&sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = true; } else { ArrayRef rels = CHECK(getObj().rels(&sec), this); target->firstRelocation = rels.begin(); target->numRelocations = rels.size(); target->areRelocsRela = false; } assert(isUInt<31>(target->numRelocations)); // Relocation sections processed by the linker are usually removed // from the output, so returning `nullptr` for the normal case. // However, if -emit-relocs is given, we need to leave them in the output. // (Some post link analysis tools need this information.) if (config->emitRelocs) { InputSection *relocSec = make(*this, sec, name); // We will not emit relocation section if target was discarded. target->dependentSections.push_back(relocSec); return relocSec; } return nullptr; } } // The GNU linker uses .note.GNU-stack section as a marker indicating // that the code in the object file does not expect that the stack is // executable (in terms of NX bit). If all input files have the marker, // the GNU linker adds a PT_GNU_STACK segment to tells the loader to // make the stack non-executable. Most object files have this section as // of 2017. // // But making the stack non-executable is a norm today for security // reasons. Failure to do so may result in a serious security issue. // Therefore, we make LLD always add PT_GNU_STACK unless it is // explicitly told to do otherwise (by -z execstack). Because the stack // executable-ness is controlled solely by command line options, // .note.GNU-stack sections are simply ignored. if (name == ".note.GNU-stack") return &InputSection::discarded; // Object files that use processor features such as Intel Control-Flow // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a // .note.gnu.property section containing a bitfield of feature bits like the // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. // // Since we merge bitmaps from multiple object files to create a new // .note.gnu.property containing a single AND'ed bitmap, we discard an input // file's .note.gnu.property section. if (name == ".note.gnu.property") { ArrayRef contents = check(this->getObj().getSectionContents(&sec)); this->andFeatures = readAndFeatures(this, contents); return &InputSection::discarded; } // Split stacks is a feature to support a discontiguous stack, // commonly used in the programming language Go. For the details, // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled // for split stack will include a .note.GNU-split-stack section. if (name == ".note.GNU-split-stack") { if (config->relocatable) { error("cannot mix split-stack and non-split-stack in a relocatable link"); return &InputSection::discarded; } this->splitStack = true; return &InputSection::discarded; } // An object file cmpiled for split stack, but where some of the // functions were compiled with the no_split_stack_attribute will // include a .note.GNU-no-split-stack section. if (name == ".note.GNU-no-split-stack") { this->someNoSplitStack = true; return &InputSection::discarded; } // The linkonce feature is a sort of proto-comdat. Some glibc i386 object // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce // sections. Drop those sections to avoid duplicate symbol errors. // FIXME: This is glibc PR20543, we should remove this hack once that has been // fixed for a while. if (name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" || name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") return &InputSection::discarded; // If we are creating a new .build-id section, strip existing .build-id // sections so that the output won't have more than one .build-id. // This is not usually a problem because input object files normally don't // have .build-id sections, but you can create such files by // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None) return &InputSection::discarded; // The linker merges EH (exception handling) frames and creates a // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (name == ".eh_frame" && !config->relocatable) return make(*this, sec, name); if (shouldMerge(sec)) return make(*this, sec, name); return make(*this, sec, name); } template StringRef ObjFile::getSectionName(const Elf_Shdr &sec) { return CHECK(getObj().getSectionName(&sec, sectionStringTable), this); } // Initialize this->Symbols. this->Symbols is a parallel array as // its corresponding ELF symbol table. template void ObjFile::initializeSymbols() { ArrayRef eSyms = this->getELFSyms(); this->symbols.resize(eSyms.size()); // Our symbol table may have already been partially initialized // because of LazyObjFile. for (size_t i = 0, end = eSyms.size(); i != end; ++i) if (!this->symbols[i] && eSyms[i].getBinding() != STB_LOCAL) this->symbols[i] = symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); // Fill this->Symbols. A symbol is either local or global. for (size_t i = 0, end = eSyms.size(); i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; // Read symbol attributes. uint32_t secIdx = getSectionIndex(eSym); if (secIdx >= this->sections.size()) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); InputSectionBase *sec = this->sections[secIdx]; uint8_t binding = eSym.getBinding(); uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; uint64_t size = eSym.st_size; StringRefZ name = this->stringTable.data() + eSym.st_name; // Handle local symbols. Local symbols are not added to the symbol // table because they are not visible from other object files. We // allocate symbol instances and add their pointers to Symbols. if (binding == STB_LOCAL) { if (eSym.getType() == STT_FILE) sourceFile = CHECK(eSym.getName(this->stringTable), this); if (this->stringTable.size() <= eSym.st_name) fatal(toString(this) + ": invalid symbol name offset"); if (eSym.st_shndx == SHN_UNDEF) this->symbols[i] = make(this, name, binding, stOther, type); else if (sec == &InputSection::discarded) this->symbols[i] = make(this, name, binding, stOther, type, /*DiscardedSecIdx=*/secIdx); else this->symbols[i] = make(this, name, binding, stOther, type, value, size, sec); continue; } // Handle global undefined symbols. if (eSym.st_shndx == SHN_UNDEF) { this->symbols[i]->resolve(Undefined{this, name, binding, stOther, type}); continue; } // Handle global common symbols. if (eSym.st_shndx == SHN_COMMON) { if (value == 0 || value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + StringRef(name.data) + "' has invalid alignment: " + Twine(value)); this->symbols[i]->resolve( CommonSymbol{this, name, binding, stOther, type, value, size}); continue; } // If a defined symbol is in a discarded section, handle it as if it // were an undefined symbol. Such symbol doesn't comply with the // standard, but in practice, a .eh_frame often directly refer // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { this->symbols[i]->resolve( Undefined{this, name, binding, stOther, type, secIdx}); continue; } // Handle global defined symbols. if (binding == STB_GLOBAL || binding == STB_WEAK || binding == STB_GNU_UNIQUE) { this->symbols[i]->resolve( Defined{this, name, binding, stOther, type, value, size, sec}); continue; } fatal(toString(this) + ": unexpected binding: " + Twine((int)binding)); } } ArchiveFile::ArchiveFile(std::unique_ptr &&file) : InputFile(ArchiveKind, file->getMemoryBufferRef()), file(std::move(file)) {} void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); } // Returns a buffer pointing to a member file containing a given symbol. void ArchiveFile::fetch(const Archive::Symbol &sym) { Archive::Child c = CHECK(sym.getMember(), toString(this) + ": could not get the member for symbol " + - sym.getName()); + toELFString(sym)); if (!seen.insert(c.getChildOffset()).second) return; MemoryBufferRef mb = CHECK(c.getMemoryBufferRef(), toString(this) + ": could not get the buffer for the member defining symbol " + - sym.getName()); + toELFString(sym)); if (tar && c.getParent()->isThin()) tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer()); InputFile *file = createObjectFile( mb, getName(), c.getParent()->isThin() ? 0 : c.getChildOffset()); file->groupId = groupId; parseFile(file); } unsigned SharedFile::vernauxNum; // Parse the version definitions in the object file if present, and return a // vector whose nth element contains a pointer to the Elf_Verdef for version // identifier n. Version identifiers that are not definitions map to nullptr. template static std::vector parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { if (!sec) return {}; // We cannot determine the largest verdef identifier without inspecting // every Elf_Verdef, but both bfd and gold assign verdef identifiers // sequentially starting from 1, so we predict that the largest identifier // will be verdefCount. unsigned verdefCount = sec->sh_info; std::vector verdefs(verdefCount + 1); // Build the Verdefs array by following the chain of Elf_Verdef objects // from the start of the .gnu.version_d section. const uint8_t *verdef = base + sec->sh_offset; for (unsigned i = 0; i != verdefCount; ++i) { auto *curVerdef = reinterpret_cast(verdef); verdef += curVerdef->vd_next; unsigned verdefIndex = curVerdef->vd_ndx; verdefs.resize(verdefIndex + 1); verdefs[verdefIndex] = curVerdef; } return verdefs; } // We do not usually care about alignments of data in shared object // files because the loader takes care of it. However, if we promote a // DSO symbol to point to .bss due to copy relocation, we need to keep // the original alignment requirements. We infer it in this function. template static uint64_t getAlignment(ArrayRef sections, const typename ELFT::Sym &sym) { uint64_t ret = UINT64_MAX; if (sym.st_value) ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value); if (0 < sym.st_shndx && sym.st_shndx < sections.size()) ret = std::min(ret, sections[sym.st_shndx].sh_addralign); return (ret > UINT32_MAX) ? 0 : ret; } // Fully parse the shared object file. // // This function parses symbol versions. If a DSO has version information, // the file has a ".gnu.version_d" section which contains symbol version // definitions. Each symbol is associated to one version through a table in // ".gnu.version" section. That table is a parallel array for the symbol // table, and each table entry contains an index in ".gnu.version_d". // // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for // VER_NDX_GLOBAL. There's no table entry for these special versions in // ".gnu.version_d". // // The file format for symbol versioning is perhaps a bit more complicated // than necessary, but you can easily understand the code if you wrap your // head around the data structure described above. template void SharedFile::parse() { using Elf_Dyn = typename ELFT::Dyn; using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; using Elf_Verdef = typename ELFT::Verdef; using Elf_Versym = typename ELFT::Versym; ArrayRef dynamicTags; const ELFFile obj = this->getObj(); ArrayRef sections = CHECK(obj.sections(), this); const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. for (const Elf_Shdr &sec : sections) { switch (sec.sh_type) { default: continue; case SHT_DYNAMIC: dynamicTags = CHECK(obj.template getSectionContentsAsArray(&sec), this); break; case SHT_GNU_versym: versymSec = &sec; break; case SHT_GNU_verdef: verdefSec = &sec; break; } } if (versymSec && numELFSyms == 0) { error("SHT_GNU_versym should be associated with symbol table"); return; } // Search for a DT_SONAME tag to initialize this->soName. for (const Elf_Dyn &dyn : dynamicTags) { if (dyn.d_tag == DT_NEEDED) { uint64_t val = dyn.getVal(); if (val >= this->stringTable.size()) fatal(toString(this) + ": invalid DT_NEEDED entry"); dtNeeded.push_back(this->stringTable.data() + val); } else if (dyn.d_tag == DT_SONAME) { uint64_t val = dyn.getVal(); if (val >= this->stringTable.size()) fatal(toString(this) + ": invalid DT_SONAME entry"); soName = this->stringTable.data() + val; } } // DSOs are uniquified not by filename but by soname. DenseMap::iterator it; bool wasInserted; std::tie(it, wasInserted) = symtab->soNames.try_emplace(soName, this); // If a DSO appears more than once on the command line with and without // --as-needed, --no-as-needed takes precedence over --as-needed because a // user can add an extra DSO with --no-as-needed to force it to be added to // the dependency list. it->second->isNeeded |= isNeeded; if (!wasInserted) return; sharedFiles.push_back(this); verdefs = parseVerdefs(obj.base(), verdefSec); // Parse ".gnu.version" section which is a parallel array for the symbol // table. If a given file doesn't have a ".gnu.version" section, we use // VER_NDX_GLOBAL. size_t size = numELFSyms - firstGlobal; std::vector versyms(size, VER_NDX_GLOBAL); if (versymSec) { ArrayRef versym = CHECK(obj.template getSectionContentsAsArray(versymSec), this) .slice(firstGlobal); for (size_t i = 0; i < size; ++i) versyms[i] = versym[i].vs_index; } // System libraries can have a lot of symbols with versions. Using a // fixed buffer for computing the versions name (foo@ver) can save a // lot of allocations. SmallString<0> versionedNameBuffer; // Add symbols to the symbol table. ArrayRef syms = this->getGlobalELFSyms(); for (size_t i = 0; i < syms.size(); ++i) { const Elf_Sym &sym = syms[i]; // ELF spec requires that all local symbols precede weak or global // symbols in each symbol table, and the index of first non-local symbol // is stored to sh_info. If a local symbol appears after some non-local // symbol, that's a violation of the spec. StringRef name = CHECK(sym.getName(this->stringTable), this); if (sym.getBinding() == STB_LOCAL) { warn("found local symbol '" + name + "' in global part of symbol table in file " + toString(this)); continue; } if (sym.isUndefined()) { Symbol *s = symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; continue; } // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly // assigns VER_NDX_LOCAL to this section global symbol. Here is a // workaround for this bug. uint32_t idx = versyms[i] & ~VERSYM_HIDDEN; if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL && name == "_gp_disp") continue; uint32_t alignment = getAlignment(sections, sym); if (!(versyms[i] & VERSYM_HIDDEN)) { symtab->addSymbol(SharedSymbol{*this, name, sym.getBinding(), sym.st_other, sym.getType(), sym.st_value, sym.st_size, alignment, idx}); } // Also add the symbol with the versioned name to handle undefined symbols // with explicit versions. if (idx == VER_NDX_GLOBAL) continue; if (idx >= verdefs.size() || idx == VER_NDX_LOCAL) { error("corrupt input file: version definition index " + Twine(idx) + " for symbol " + name + " is out of bounds\n>>> defined in " + toString(this)); continue; } StringRef verName = this->stringTable.data() + reinterpret_cast(verdefs[idx])->getAux()->vda_name; versionedNameBuffer.clear(); name = (name + "@" + verName).toStringRef(versionedNameBuffer); symtab->addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), sym.st_other, sym.getType(), sym.st_value, sym.st_size, alignment, idx}); } } static ELFKind getBitcodeELFKind(const Triple &t) { if (t.isLittleEndian()) return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } static uint8_t getBitcodeMachineKind(StringRef path, const Triple &t) { switch (t.getArch()) { case Triple::aarch64: return EM_AARCH64; case Triple::amdgcn: case Triple::r600: return EM_AMDGPU; case Triple::arm: case Triple::thumb: return EM_ARM; case Triple::avr: return EM_AVR; case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: return EM_MIPS; case Triple::msp430: return EM_MSP430; case Triple::ppc: return EM_PPC; case Triple::ppc64: case Triple::ppc64le: return EM_PPC64; case Triple::riscv32: case Triple::riscv64: return EM_RISCV; case Triple::x86: return t.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: return EM_X86_64; default: error(path + ": could not infer e_machine from bitcode target triple " + t.str()); return EM_NONE; } } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) : InputFile(BitcodeKind, mb) { this->archiveName = archiveName; std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) path = replaceThinLTOSuffix(mb.getBufferIdentifier()); // ThinLTO assumes that all MemoryBufferRefs given to it have a unique // name. If two archives define two members with the same name, this // causes a collision which result in only one of the objects being taken // into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). So we append file offset to make // filename unique. StringRef name = archiveName.empty() ? saver.save(path) : saver.save(archiveName + "(" + path + " at " + utostr(offsetInArchive) + ")"); MemoryBufferRef mbref(mb.getBuffer(), name); obj = CHECK(lto::InputFile::create(mbref), this); Triple t(obj->getTargetTriple()); ekind = getBitcodeELFKind(t); emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); } static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { switch (gvVisibility) { case GlobalValue::DefaultVisibility: return STV_DEFAULT; case GlobalValue::HiddenVisibility: return STV_HIDDEN; case GlobalValue::ProtectedVisibility: return STV_PROTECTED; } llvm_unreachable("unknown visibility"); } template static Symbol *createBitcodeSymbol(const std::vector &keptComdats, const lto::InputFile::Symbol &objSym, BitcodeFile &f) { StringRef name = saver.save(objSym.getName()); uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; uint8_t visibility = mapVisibility(objSym.getVisibility()); bool canOmitFromDynSym = objSym.canBeOmittedFromSymbolTable(); int c = objSym.getComdatIndex(); if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { Undefined New(&f, name, binding, visibility, type); if (canOmitFromDynSym) New.exportDynamic = false; return symtab->addSymbol(New); } if (objSym.isCommon()) return symtab->addSymbol( CommonSymbol{&f, name, binding, visibility, STT_OBJECT, objSym.getCommonAlignment(), objSym.getCommonSize()}); Defined New(&f, name, binding, visibility, type, 0, 0, nullptr); if (canOmitFromDynSym) New.exportDynamic = false; return symtab->addSymbol(New); } template void BitcodeFile::parse() { std::vector keptComdats; for (StringRef s : obj->getComdatTable()) keptComdats.push_back( symtab->comdatGroups.try_emplace(CachedHashStringRef(s), this).second); for (const lto::InputFile::Symbol &objSym : obj->symbols()) symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this)); for (auto l : obj->getDependentLibraries()) addDependentLibrary(l, this); } void BinaryFile::parse() { ArrayRef data = arrayRefFromStringRef(mb.getBuffer()); auto *section = make(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, data, ".data"); sections.push_back(section); // For each input file foo that is embedded to a result as a binary // blob, we define _binary_foo_{start,end,size} symbols, so that // user programs can access blobs by name. Non-alphanumeric // characters in a filename are replaced with underscore. std::string s = "_binary_" + mb.getBufferIdentifier().str(); for (size_t i = 0; i < s.size(); ++i) if (!isAlnum(s[i])) s[i] = '_'; symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, section}); symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL, STV_DEFAULT, STT_OBJECT, data.size(), 0, section}); symtab->addSymbol(Defined{nullptr, saver.save(s + "_size"), STB_GLOBAL, STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr}); } InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) { if (isBitcode(mb)) return make(mb, archiveName, offsetInArchive); switch (getELFKind(mb, archiveName)) { case ELF32LEKind: return make>(mb, archiveName); case ELF32BEKind: return make>(mb, archiveName); case ELF64LEKind: return make>(mb, archiveName); case ELF64BEKind: return make>(mb, archiveName); default: llvm_unreachable("getELFKind"); } } void LazyObjFile::fetch() { if (mb.getBuffer().empty()) return; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; mb = {}; // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); parseFile(file); } template void LazyObjFile::parse() { using Elf_Sym = typename ELFT::Sym; // A lazy object file wraps either a bitcode file or an ELF file. if (isBitcode(this->mb)) { std::unique_ptr obj = CHECK(lto::InputFile::create(this->mb), this); for (const lto::InputFile::Symbol &sym : obj->symbols()) { if (sym.isUndefined()) continue; symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); } return; } if (getELFKind(this->mb, archiveName) != config->ekind) { error("incompatible file: " + this->mb.getBufferIdentifier()); return; } // Find a symbol table. ELFFile obj = check(ELFFile::create(mb.getBuffer())); ArrayRef sections = CHECK(obj.sections(), this); for (const typename ELFT::Shdr &sec : sections) { if (sec.sh_type != SHT_SYMTAB) continue; // A symbol table is found. ArrayRef eSyms = CHECK(obj.symbols(&sec), this); uint32_t firstGlobal = sec.sh_info; StringRef strtab = CHECK(obj.getStringTableForSymtab(sec, sections), this); this->symbols.resize(eSyms.size()); // Get existing symbols or insert placeholder symbols. for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) if (eSyms[i].st_shndx != SHN_UNDEF) this->symbols[i] = symtab->insert(CHECK(eSyms[i].getName(strtab), this)); // Replace existing symbols with LazyObject symbols. // // resolve() may trigger this->fetch() if an existing symbol is an // undefined symbol. If that happens, this LazyObjFile has served // its purpose, and we can exit from the loop early. for (Symbol *sym : this->symbols) { if (!sym) continue; sym->resolve(LazyObject{*this, sym->getName()}); // MemoryBuffer is emptied if this file is instantiated as ObjFile. if (mb.getBuffer().empty()) return; } return; } } std::string elf::replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; if (path.consume_back(suffix)) return (path + repl).str(); return path; } template void BitcodeFile::parse(); template void BitcodeFile::parse(); template void BitcodeFile::parse(); template void BitcodeFile::parse(); template void LazyObjFile::parse(); template void LazyObjFile::parse(); template void LazyObjFile::parse(); template void LazyObjFile::parse(); template class elf::ObjFile; template class elf::ObjFile; template class elf::ObjFile; template class elf::ObjFile; template void SharedFile::parse(); template void SharedFile::parse(); template void SharedFile::parse(); template void SharedFile::parse(); Index: vendor/lld/dist-release_90/ELF/Symbols.cpp =================================================================== --- vendor/lld/dist-release_90/ELF/Symbols.cpp (revision 351717) +++ vendor/lld/dist-release_90/ELF/Symbols.cpp (revision 351718) @@ -1,656 +1,663 @@ //===- Symbols.cpp --------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Symbols.h" #include "InputFiles.h" #include "InputSection.h" #include "OutputSections.h" #include "SyntheticSections.h" #include "Target.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Path.h" #include using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; Defined *ElfSym::bss; Defined *ElfSym::etext1; Defined *ElfSym::etext2; Defined *ElfSym::edata1; Defined *ElfSym::edata2; Defined *ElfSym::end1; Defined *ElfSym::end2; Defined *ElfSym::globalOffsetTable; Defined *ElfSym::mipsGp; Defined *ElfSym::mipsGpDisp; Defined *ElfSym::mipsLocalGp; Defined *ElfSym::relaIpltStart; Defined *ElfSym::relaIpltEnd; Defined *ElfSym::riscvGlobalPointer; Defined *ElfSym::tlsModuleBase; +// Returns a symbol for an error message. +static std::string demangle(StringRef symName) { + if (config->demangle) + if (Optional s = demangleItanium(symName)) + return *s; + return symName; +} +namespace lld { +std::string toString(const Symbol &b) { return demangle(b.getName()); } +std::string toELFString(const Archive::Symbol &b) { + return demangle(b.getName()); +} +} // namespace lld + static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { switch (sym.kind()) { case Symbol::DefinedKind: { auto &d = cast(sym); SectionBase *isec = d.section; // This is an absolute symbol. if (!isec) return d.value; assert(isec != &InputSection::discarded); isec = isec->repl; uint64_t offset = d.value; // An object in an SHF_MERGE section might be referenced via a // section symbol (as a hack for reducing the number of local // symbols). // Depending on the addend, the reference via a section symbol // refers to a different object in the merge section. // Since the objects in the merge section are not necessarily // contiguous in the output, the addend can thus affect the final // VA in a non-linear way. // To make this work, we incorporate the addend into the section // offset (and zero out the addend for later processing) so that // we find the right object in the section. if (d.isSection()) { offset += addend; addend = 0; } // In the typical case, this is actually very simple and boils // down to adding together 3 numbers: // 1. The address of the output section. // 2. The offset of the input section within the output section. // 3. The offset within the input section (this addition happens // inside InputSection::getOffset). // // If you understand the data structures involved with this next // line (and how they get built), then you have a pretty good // understanding of the linker. uint64_t va = isec->getVA(offset); // MIPS relocatable files can mix regular and microMIPS code. // Linker needs to distinguish such code. To do so microMIPS // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other` // field. Unfortunately, the `MIPS::relocateOne()` method has // a symbol value only. To pass type of the symbol (regular/microMIPS) // to that routine as well as other places where we write // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry` // field etc) do the same trick as compiler uses to mark microMIPS // for CPU - set the less-significant bit. if (config->emachine == EM_MIPS && isMicroMips() && ((sym.stOther & STO_MIPS_MICROMIPS) || sym.needsPltAddr)) va |= 1; if (d.isTls() && !config->relocatable) { // Use the address of the TLS segment's first section rather than the // segment's address, because segment addresses aren't initialized until // after sections are finalized. (e.g. Measuring the size of .rela.dyn // for Android relocation packing requires knowing TLS symbol addresses // during section finalization.) if (!Out::tlsPhdr || !Out::tlsPhdr->firstSec) fatal(toString(d.file) + " has an STT_TLS symbol but doesn't have an SHF_TLS section"); return va - Out::tlsPhdr->firstSec->addr; } return va; } case Symbol::SharedKind: case Symbol::UndefinedKind: return 0; case Symbol::LazyArchiveKind: case Symbol::LazyObjectKind: assert(sym.isUsedInRegularObj && "lazy symbol reached writer"); return 0; case Symbol::CommonKind: llvm_unreachable("common symbol reached writer"); case Symbol::PlaceholderKind: llvm_unreachable("placeholder symbol reached writer"); } llvm_unreachable("invalid symbol kind"); } uint64_t Symbol::getVA(int64_t addend) const { uint64_t outVA = getSymVA(*this, addend); return outVA + addend; } uint64_t Symbol::getGotVA() const { if (gotInIgot) return in.igotPlt->getVA() + getGotPltOffset(); return in.got->getVA() + getGotOffset(); } uint64_t Symbol::getGotOffset() const { return gotIndex * config->wordsize; } uint64_t Symbol::getGotPltVA() const { if (isInIplt) return in.igotPlt->getVA() + getGotPltOffset(); return in.gotPlt->getVA() + getGotPltOffset(); } uint64_t Symbol::getGotPltOffset() const { if (isInIplt) return pltIndex * config->wordsize; return (pltIndex + target->gotPltHeaderEntriesNum) * config->wordsize; } uint64_t Symbol::getPPC64LongBranchOffset() const { assert(ppc64BranchltIndex != 0xffff); return ppc64BranchltIndex * config->wordsize; } uint64_t Symbol::getPltVA() const { PltSection *plt = isInIplt ? in.iplt : in.plt; uint64_t outVA = plt->getVA() + plt->headerSize + pltIndex * target->pltEntrySize; // While linking microMIPS code PLT code are always microMIPS // code. Set the less-significant bit to track that fact. // See detailed comment in the `getSymVA` function. if (config->emachine == EM_MIPS && isMicroMips()) outVA |= 1; return outVA; } uint64_t Symbol::getPPC64LongBranchTableVA() const { assert(ppc64BranchltIndex != 0xffff); return in.ppc64LongBranchTarget->getVA() + ppc64BranchltIndex * config->wordsize; } uint64_t Symbol::getSize() const { if (const auto *dr = dyn_cast(this)) return dr->size; return cast(this)->size; } OutputSection *Symbol::getOutputSection() const { if (auto *s = dyn_cast(this)) { if (auto *sec = s->section) return sec->repl->getOutputSection(); return nullptr; } return nullptr; } // If a symbol name contains '@', the characters after that is // a symbol version name. This function parses that. void Symbol::parseSymbolVersion() { StringRef s = getName(); size_t pos = s.find('@'); if (pos == 0 || pos == StringRef::npos) return; StringRef verstr = s.substr(pos + 1); if (verstr.empty()) return; // Truncate the symbol name so that it doesn't include the version string. nameSize = pos; // If this is not in this DSO, it is not a definition. if (!isDefined()) return; // '@@' in a symbol name means the default version. // It is usually the most recent one. bool isDefault = (verstr[0] == '@'); if (isDefault) verstr = verstr.substr(1); for (VersionDefinition &ver : config->versionDefinitions) { if (ver.name != verstr) continue; if (isDefault) versionId = ver.id; else versionId = ver.id | VERSYM_HIDDEN; return; } // It is an error if the specified version is not defined. // Usually version script is not provided when linking executable, // but we may still want to override a versioned symbol from DSO, // so we do not report error in this case. We also do not error // if the symbol has a local version as it won't be in the dynamic // symbol table. if (config->shared && versionId != VER_NDX_LOCAL) error(toString(file) + ": symbol " + s + " has undefined version " + verstr); } void Symbol::fetch() const { if (auto *sym = dyn_cast(this)) { cast(sym->file)->fetch(sym->sym); return; } if (auto *sym = dyn_cast(this)) { dyn_cast(sym->file)->fetch(); return; } llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol"); } MemoryBufferRef LazyArchive::getMemberBuffer() { - Archive::Child c = CHECK( - sym.getMember(), "could not get the member for symbol " + sym.getName()); + Archive::Child c = + CHECK(sym.getMember(), + "could not get the member for symbol " + toELFString(sym)); return CHECK(c.getMemoryBufferRef(), "could not get the buffer for the member defining symbol " + - sym.getName()); + toELFString(sym)); } uint8_t Symbol::computeBinding() const { if (config->relocatable) return binding; if (visibility != STV_DEFAULT && visibility != STV_PROTECTED) return STB_LOCAL; if (versionId == VER_NDX_LOCAL && isDefined() && !isPreemptible) return STB_LOCAL; if (!config->gnuUnique && binding == STB_GNU_UNIQUE) return STB_GLOBAL; return binding; } bool Symbol::includeInDynsym() const { if (!config->hasDynSymTab) return false; if (computeBinding() == STB_LOCAL) return false; // If a PIE binary was not linked against any shared libraries, then we can // safely drop weak undef symbols from .dynsym. if (isUndefWeak() && config->pie && sharedFiles.empty()) return false; return isUndefined() || isShared() || exportDynamic; } // Print out a log message for --trace-symbol. void elf::printTraceSymbol(const Symbol *sym) { std::string s; if (sym->isUndefined()) s = ": reference to "; else if (sym->isLazy()) s = ": lazy definition of "; else if (sym->isShared()) s = ": shared definition of "; else if (sym->isCommon()) s = ": common definition of "; else s = ": definition of "; message(toString(sym->file) + s + sym->getName()); } void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { if (!config->warnSymbolOrdering) return; // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning // is emitted. It makes sense to not warn on undefined symbols. // // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols, // but we don't have to be compatible here. if (sym->isUndefined() && config->unresolvedSymbols == UnresolvedPolicy::Ignore) return; const InputFile *file = sym->file; auto *d = dyn_cast(sym); auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); }; if (sym->isUndefined()) report(": unable to order undefined symbol: "); else if (sym->isShared()) report(": unable to order shared symbol: "); else if (d && !d->section) report(": unable to order absolute symbol: "); else if (d && isa(d->section)) report(": unable to order synthetic symbol: "); else if (d && !d->section->repl->isLive()) report(": unable to order discarded symbol: "); -} - -// Returns a symbol for an error message. -std::string lld::toString(const Symbol &b) { - if (config->demangle) - if (Optional s = demangleItanium(b.getName())) - return *s; - return b.getName(); } static uint8_t getMinVisibility(uint8_t va, uint8_t vb) { if (va == STV_DEFAULT) return vb; if (vb == STV_DEFAULT) return va; return std::min(va, vb); } // Merge symbol properties. // // When we have many symbols of the same name, we choose one of them, // and that's the result of symbol resolution. However, symbols that // were not chosen still affect some symbol properties. void Symbol::mergeProperties(const Symbol &other) { if (other.exportDynamic) exportDynamic = true; if (other.isUsedInRegularObj) isUsedInRegularObj = true; // DSO symbols do not affect visibility in the output. if (!other.isShared()) visibility = getMinVisibility(visibility, other.visibility); } void Symbol::resolve(const Symbol &other) { mergeProperties(other); if (isPlaceholder()) { replace(other); return; } switch (other.kind()) { case Symbol::UndefinedKind: resolveUndefined(cast(other)); break; case Symbol::CommonKind: resolveCommon(cast(other)); break; case Symbol::DefinedKind: resolveDefined(cast(other)); break; case Symbol::LazyArchiveKind: resolveLazy(cast(other)); break; case Symbol::LazyObjectKind: resolveLazy(cast(other)); break; case Symbol::SharedKind: resolveShared(cast(other)); break; case Symbol::PlaceholderKind: llvm_unreachable("bad symbol kind"); } } void Symbol::resolveUndefined(const Undefined &other) { // An undefined symbol with non default visibility must be satisfied // in the same DSO. // // If this is a non-weak defined symbol in a discarded section, override the // existing undefined symbol for better error message later. if ((isShared() && other.visibility != STV_DEFAULT) || (isUndefined() && other.binding != STB_WEAK && other.discardedSecIdx)) { replace(other); return; } if (traced) printTraceSymbol(&other); if (isLazy()) { // An undefined weak will not fetch archive members. See comment on Lazy in // Symbols.h for the details. if (other.binding == STB_WEAK) { binding = STB_WEAK; type = other.type; return; } // Do extra check for --warn-backrefs. // // --warn-backrefs is an option to prevent an undefined reference from // fetching an archive member written earlier in the command line. It can be // used to keep compatibility with GNU linkers to some degree. // I'll explain the feature and why you may find it useful in this comment. // // lld's symbol resolution semantics is more relaxed than traditional Unix // linkers. For example, // // ld.lld foo.a bar.o // // succeeds even if bar.o contains an undefined symbol that has to be // resolved by some object file in foo.a. Traditional Unix linkers don't // allow this kind of backward reference, as they visit each file only once // from left to right in the command line while resolving all undefined // symbols at the moment of visiting. // // In the above case, since there's no undefined symbol when a linker visits // foo.a, no files are pulled out from foo.a, and because the linker forgets // about foo.a after visiting, it can't resolve undefined symbols in bar.o // that could have been resolved otherwise. // // That lld accepts more relaxed form means that (besides it'd make more // sense) you can accidentally write a command line or a build file that // works only with lld, even if you have a plan to distribute it to wider // users who may be using GNU linkers. With --warn-backrefs, you can detect // a library order that doesn't work with other Unix linkers. // // The option is also useful to detect cyclic dependencies between static // archives. Again, lld accepts // // ld.lld foo.a bar.a // // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is // handled as an error. // // Here is how the option works. We assign a group ID to each file. A file // with a smaller group ID can pull out object files from an archive file // with an equal or greater group ID. Otherwise, it is a reverse dependency // and an error. // // A file outside --{start,end}-group gets a fresh ID when instantiated. All // files within the same --{start,end}-group get the same group ID. E.g. // // ld.lld A B --start-group C D --end-group E // // A forms group 0. B form group 1. C and D (including their member object // files) form group 2. E forms group 3. I think that you can see how this // group assignment rule simulates the traditional linker's semantics. bool backref = config->warnBackrefs && other.file && file->groupId < other.file->groupId; fetch(); // We don't report backward references to weak symbols as they can be // overridden later. if (backref && !isWeak()) warn("backward reference detected: " + other.getName() + " in " + toString(other.file) + " refers to " + toString(file)); return; } // Undefined symbols in a SharedFile do not change the binding. if (dyn_cast_or_null(other.file)) return; if (isUndefined()) { // The binding may "upgrade" from weak to non-weak. if (other.binding != STB_WEAK) binding = other.binding; } else if (auto *s = dyn_cast(this)) { // The binding of a SharedSymbol will be weak if there is at least one // reference and all are weak. The binding has one opportunity to change to // weak: if the first reference is weak. if (other.binding != STB_WEAK || !s->referenced) binding = other.binding; s->referenced = true; } } // Using .symver foo,foo@@VER unfortunately creates two symbols: foo and // foo@@VER. We want to effectively ignore foo, so give precedence to // foo@@VER. // FIXME: If users can transition to using // .symver foo,foo@@@VER // we can delete this hack. static int compareVersion(StringRef a, StringRef b) { bool x = a.contains("@@"); bool y = b.contains("@@"); if (!x && y) return 1; if (x && !y) return -1; return 0; } // Compare two symbols. Return 1 if the new symbol should win, -1 if // the new symbol should lose, or 0 if there is a conflict. int Symbol::compare(const Symbol *other) const { assert(other->isDefined() || other->isCommon()); if (!isDefined() && !isCommon()) return 1; if (int cmp = compareVersion(getName(), other->getName())) return cmp; if (other->isWeak()) return -1; if (isWeak()) return 1; if (isCommon() && other->isCommon()) { if (config->warnCommon) warn("multiple common of " + getName()); return 0; } if (isCommon()) { if (config->warnCommon) warn("common " + getName() + " is overridden"); return 1; } if (other->isCommon()) { if (config->warnCommon) warn("common " + getName() + " is overridden"); return -1; } auto *oldSym = cast(this); auto *newSym = cast(other); if (other->file && isa(other->file)) return 0; if (!oldSym->section && !newSym->section && oldSym->value == newSym->value && newSym->binding == STB_GLOBAL) return -1; return 0; } static void reportDuplicate(Symbol *sym, InputFile *newFile, InputSectionBase *errSec, uint64_t errOffset) { if (config->allowMultipleDefinition) return; Defined *d = cast(sym); if (!d->section || !errSec) { error("duplicate symbol: " + toString(*sym) + "\n>>> defined in " + toString(sym->file) + "\n>>> defined in " + toString(newFile)); return; } // Construct and print an error message in the form of: // // ld.lld: error: duplicate symbol: foo // >>> defined at bar.c:30 // >>> bar.o (/home/alice/src/bar.o) // >>> defined at baz.c:563 // >>> baz.o in archive libbaz.a auto *sec1 = cast(d->section); std::string src1 = sec1->getSrcMsg(*sym, d->value); std::string obj1 = sec1->getObjMsg(d->value); std::string src2 = errSec->getSrcMsg(*sym, errOffset); std::string obj2 = errSec->getObjMsg(errOffset); std::string msg = "duplicate symbol: " + toString(*sym) + "\n>>> defined at "; if (!src1.empty()) msg += src1 + "\n>>> "; msg += obj1 + "\n>>> defined at "; if (!src2.empty()) msg += src2 + "\n>>> "; msg += obj2; error(msg); } void Symbol::resolveCommon(const CommonSymbol &other) { int cmp = compare(&other); if (cmp < 0) return; if (cmp > 0) { replace(other); return; } CommonSymbol *oldSym = cast(this); oldSym->alignment = std::max(oldSym->alignment, other.alignment); if (oldSym->size < other.size) { oldSym->file = other.file; oldSym->size = other.size; } } void Symbol::resolveDefined(const Defined &other) { int cmp = compare(&other); if (cmp > 0) replace(other); else if (cmp == 0) reportDuplicate(this, other.file, dyn_cast_or_null(other.section), other.value); } template void Symbol::resolveLazy(const LazyT &other) { if (!isUndefined()) return; // An undefined weak will not fetch archive members. See comment on Lazy in // Symbols.h for the details. if (isWeak()) { uint8_t ty = type; replace(other); type = ty; binding = STB_WEAK; return; } other.fetch(); } void Symbol::resolveShared(const SharedSymbol &other) { if (visibility == STV_DEFAULT && (isUndefined() || isLazy())) { // An undefined symbol with non default visibility must be satisfied // in the same DSO. uint8_t bind = binding; replace(other); binding = bind; cast(this)->referenced = true; } } Index: vendor/lld/dist-release_90/ELF/Symbols.h =================================================================== --- vendor/lld/dist-release_90/ELF/Symbols.h (revision 351717) +++ vendor/lld/dist-release_90/ELF/Symbols.h (revision 351718) @@ -1,554 +1,558 @@ //===- Symbols.h ------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines various types of Symbols. // //===----------------------------------------------------------------------===// #ifndef LLD_ELF_SYMBOLS_H #define LLD_ELF_SYMBOLS_H #include "InputFiles.h" #include "InputSection.h" #include "lld/Common/LLVM.h" #include "lld/Common/Strings.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" namespace lld { namespace elf { class CommonSymbol; class Defined; class InputFile; class LazyArchive; class LazyObject; class SharedSymbol; class Symbol; class Undefined; } // namespace elf std::string toString(const elf::Symbol &); -std::string toString(const elf::InputFile *); + +// There are two different ways to convert an Archive::Symbol to a string: +// One for Microsoft name mangling and one for Itanium name mangling. +// Call the functions toCOFFString and toELFString, not just toString. +std::string toELFString(const elf::Archive::Symbol &); namespace elf { // This is a StringRef-like container that doesn't run strlen(). // // ELF string tables contain a lot of null-terminated strings. Most of them // are not necessary for the linker because they are names of local symbols, // and the linker doesn't use local symbol names for name resolution. So, we // use this class to represents strings read from string tables. struct StringRefZ { StringRefZ(const char *s) : data(s), size(-1) {} StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} const char *data; const uint32_t size; }; // The base class for real symbol classes. class Symbol { public: enum Kind { PlaceholderKind, DefinedKind, CommonKind, SharedKind, UndefinedKind, LazyArchiveKind, LazyObjectKind, }; Kind kind() const { return static_cast(symbolKind); } // The file from which this symbol was created. InputFile *file; protected: const char *nameData; mutable uint32_t nameSize; public: uint32_t dynsymIndex = 0; uint32_t gotIndex = -1; uint32_t pltIndex = -1; uint32_t globalDynIndex = -1; // This field is a index to the symbol's version definition. uint32_t verdefIndex = -1; // Version definition index. uint16_t versionId; // An index into the .branch_lt section on PPC64. uint16_t ppc64BranchltIndex = -1; // Symbol binding. This is not overwritten by replace() to track // changes during resolution. In particular: // - An undefined weak is still weak when it resolves to a shared library. // - An undefined weak will not fetch archive members, but we have to // remember it is weak. uint8_t binding; // The following fields have the same meaning as the ELF symbol attributes. uint8_t type; // symbol type uint8_t stOther; // st_other field value uint8_t symbolKind; // Symbol visibility. This is the computed minimum visibility of all // observed non-DSO symbols. unsigned visibility : 2; // True if the symbol was used for linking and thus need to be added to the // output file's symbol table. This is true for all symbols except for // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that // are unreferenced except by other bitcode objects. unsigned isUsedInRegularObj : 1; // If this flag is true and the symbol has protected or default visibility, it // will appear in .dynsym. This flag is set by interposable DSO symbols in // executables, by most symbols in DSOs and executables built with // --export-dynamic, and by dynamic lists. unsigned exportDynamic : 1; // False if LTO shouldn't inline whatever this symbol points to. If a symbol // is overwritten after LTO, LTO shouldn't inline the symbol because it // doesn't know the final contents of the symbol. unsigned canInline : 1; // True if this symbol is specified by --trace-symbol option. unsigned traced : 1; inline void replace(const Symbol &New); bool includeInDynsym() const; uint8_t computeBinding() const; bool isWeak() const { return binding == llvm::ELF::STB_WEAK; } bool isUndefined() const { return symbolKind == UndefinedKind; } bool isCommon() const { return symbolKind == CommonKind; } bool isDefined() const { return symbolKind == DefinedKind; } bool isShared() const { return symbolKind == SharedKind; } bool isPlaceholder() const { return symbolKind == PlaceholderKind; } bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; } bool isLazy() const { return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; } // True if this is an undefined weak symbol. This only works once // all input files have been added. bool isUndefWeak() const { // See comment on lazy symbols for details. return isWeak() && (isUndefined() || isLazy()); } StringRef getName() const { if (nameSize == (uint32_t)-1) nameSize = strlen(nameData); return {nameData, nameSize}; } void setName(StringRef s) { nameData = s.data(); nameSize = s.size(); } void parseSymbolVersion(); bool isInGot() const { return gotIndex != -1U; } bool isInPlt() const { return pltIndex != -1U; } bool isInPPC64Branchlt() const { return ppc64BranchltIndex != 0xffff; } uint64_t getVA(int64_t addend = 0) const; uint64_t getGotOffset() const; uint64_t getGotVA() const; uint64_t getGotPltOffset() const; uint64_t getGotPltVA() const; uint64_t getPltVA() const; uint64_t getPPC64LongBranchTableVA() const; uint64_t getPPC64LongBranchOffset() const; uint64_t getSize() const; OutputSection *getOutputSection() const; // The following two functions are used for symbol resolution. // // You are expected to call mergeProperties for all symbols in input // files so that attributes that are attached to names rather than // indivisual symbol (such as visibility) are merged together. // // Every time you read a new symbol from an input, you are supposed // to call resolve() with the new symbol. That function replaces // "this" object as a result of name resolution if the new symbol is // more appropriate to be included in the output. // // For example, if "this" is an undefined symbol and a new symbol is // a defined symbol, "this" is replaced with the new symbol. void mergeProperties(const Symbol &other); void resolve(const Symbol &other); // If this is a lazy symbol, fetch an input file and add the symbol // in the file to the symbol table. Calling this function on // non-lazy object causes a runtime error. void fetch() const; private: static bool isExportDynamic(Kind k, uint8_t visibility) { if (k == SharedKind) return visibility == llvm::ELF::STV_DEFAULT; return config->shared || config->exportDynamic; } void resolveUndefined(const Undefined &other); void resolveCommon(const CommonSymbol &other); void resolveDefined(const Defined &other); template void resolveLazy(const LazyT &other); void resolveShared(const SharedSymbol &other); int compare(const Symbol *other) const; inline size_t getSymbolSize() const; protected: Symbol(Kind k, InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, uint8_t type) : file(file), nameData(name.data), nameSize(name.size), binding(binding), type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3), isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind), exportDynamic(isExportDynamic(k, visibility)), canInline(false), traced(false), needsPltAddr(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), needsTocRestore(false), scriptDefined(false) {} public: // True the symbol should point to its PLT entry. // For SharedSymbol only. unsigned needsPltAddr : 1; // True if this symbol is in the Iplt sub-section of the Plt and the Igot // sub-section of the .got.plt or .got. unsigned isInIplt : 1; // True if this symbol needs a GOT entry and its GOT entry is actually in // Igot. This will be true only for certain non-preemptible ifuncs. unsigned gotInIgot : 1; // True if this symbol is preemptible at load time. unsigned isPreemptible : 1; // True if an undefined or shared symbol is used from a live section. unsigned used : 1; // True if a call to this symbol needs to be followed by a restore of the // PPC64 toc pointer. unsigned needsTocRestore : 1; // True if this symbol is defined by a linker script. unsigned scriptDefined : 1; // The partition whose dynamic symbol table contains this symbol's definition. uint8_t partition = 1; bool isSection() const { return type == llvm::ELF::STT_SECTION; } bool isTls() const { return type == llvm::ELF::STT_TLS; } bool isFunc() const { return type == llvm::ELF::STT_FUNC; } bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; } bool isObject() const { return type == llvm::ELF::STT_OBJECT; } bool isFile() const { return type == llvm::ELF::STT_FILE; } }; // Represents a symbol that is defined in the current output file. class Defined : public Symbol { public: Defined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, uint8_t type, uint64_t value, uint64_t size, SectionBase *section) : Symbol(DefinedKind, file, name, binding, stOther, type), value(value), size(size), section(section) {} static bool classof(const Symbol *s) { return s->isDefined(); } uint64_t value; uint64_t size; SectionBase *section; }; // Represents a common symbol. // // On Unix, it is traditionally allowed to write variable definitions // without initialization expressions (such as "int foo;") to header // files. Such definition is called "tentative definition". // // Using tentative definition is usually considered a bad practice // because you should write only declarations (such as "extern int // foo;") to header files. Nevertheless, the linker and the compiler // have to do something to support bad code by allowing duplicate // definitions for this particular case. // // Common symbols represent variable definitions without initializations. // The compiler creates common symbols when it sees varaible definitions // without initialization (you can suppress this behavior and let the // compiler create a regular defined symbol by -fno-common). // // The linker allows common symbols to be replaced by regular defined // symbols. If there are remaining common symbols after name resolution is // complete, they are converted to regular defined symbols in a .bss // section. (Therefore, the later passes don't see any CommonSymbols.) class CommonSymbol : public Symbol { public: CommonSymbol(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size) : Symbol(CommonKind, file, name, binding, stOther, type), alignment(alignment), size(size) {} static bool classof(const Symbol *s) { return s->isCommon(); } uint32_t alignment; uint64_t size; }; class Undefined : public Symbol { public: Undefined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, uint8_t type, uint32_t discardedSecIdx = 0) : Symbol(UndefinedKind, file, name, binding, stOther, type), discardedSecIdx(discardedSecIdx) {} static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } // The section index if in a discarded section, 0 otherwise. uint32_t discardedSecIdx; }; class SharedSymbol : public Symbol { public: static bool classof(const Symbol *s) { return s->kind() == SharedKind; } SharedSymbol(InputFile &file, StringRef name, uint8_t binding, uint8_t stOther, uint8_t type, uint64_t value, uint64_t size, uint32_t alignment, uint32_t verdefIndex) : Symbol(SharedKind, &file, name, binding, stOther, type), value(value), size(size), alignment(alignment) { this->verdefIndex = verdefIndex; // GNU ifunc is a mechanism to allow user-supplied functions to // resolve PLT slot values at load-time. This is contrary to the // regular symbol resolution scheme in which symbols are resolved just // by name. Using this hook, you can program how symbols are solved // for you program. For example, you can make "memcpy" to be resolved // to a SSE-enabled version of memcpy only when a machine running the // program supports the SSE instruction set. // // Naturally, such symbols should always be called through their PLT // slots. What GNU ifunc symbols point to are resolver functions, and // calling them directly doesn't make sense (unless you are writing a // loader). // // For DSO symbols, we always call them through PLT slots anyway. // So there's no difference between GNU ifunc and regular function // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC. if (this->type == llvm::ELF::STT_GNU_IFUNC) this->type = llvm::ELF::STT_FUNC; } SharedFile &getFile() const { return *cast(file); } uint64_t value; // st_value uint64_t size; // st_size uint32_t alignment; // This is true if there has been at least one undefined reference to the // symbol. The binding may change to STB_WEAK if the first undefined reference // is weak. bool referenced = false; }; // LazyArchive and LazyObject represent a symbols that is not yet in the link, // but we know where to find it if needed. If the resolver finds both Undefined // and Lazy for the same name, it will ask the Lazy to load a file. // // A special complication is the handling of weak undefined symbols. They should // not load a file, but we have to remember we have seen both the weak undefined // and the lazy. We represent that with a lazy symbol with a weak binding. This // means that code looking for undefined symbols normally also has to take lazy // symbols into consideration. // This class represents a symbol defined in an archive file. It is // created from an archive file header, and it knows how to load an // object file from an archive to replace itself with a defined // symbol. class LazyArchive : public Symbol { public: LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s) : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL, llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE), sym(s) {} static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } MemoryBufferRef getMemberBuffer(); const llvm::object::Archive::Symbol sym; }; // LazyObject symbols represents symbols in object files between // --start-lib and --end-lib options. class LazyObject : public Symbol { public: LazyObject(InputFile &file, StringRef name) : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL, llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {} static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } }; // Some linker-generated symbols need to be created as // Defined symbols. struct ElfSym { // __bss_start static Defined *bss; // etext and _etext static Defined *etext1; static Defined *etext2; // edata and _edata static Defined *edata1; static Defined *edata2; // end and _end static Defined *end1; static Defined *end2; // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to // be at some offset from the base of the .got section, usually 0 or // the end of the .got. static Defined *globalOffsetTable; // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS. static Defined *mipsGp; static Defined *mipsGpDisp; static Defined *mipsLocalGp; // __rel{,a}_iplt_{start,end} symbols. static Defined *relaIpltStart; static Defined *relaIpltEnd; // __global_pointer$ for RISC-V. static Defined *riscvGlobalPointer; // _TLS_MODULE_BASE_ on targets that support TLSDESC. static Defined *tlsModuleBase; }; // A buffer class that is large enough to hold any Symbol-derived // object. We allocate memory using this class and instantiate a symbol // using the placement new. union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(CommonSymbol) char b[sizeof(CommonSymbol)]; alignas(Undefined) char c[sizeof(Undefined)]; alignas(SharedSymbol) char d[sizeof(SharedSymbol)]; alignas(LazyArchive) char e[sizeof(LazyArchive)]; alignas(LazyObject) char f[sizeof(LazyObject)]; }; // It is important to keep the size of SymbolUnion small for performance and // memory usage reasons. 80 bytes is a soft limit based on the size of Defined // on a 64-bit system. static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large"); template struct AssertSymbol { static_assert(std::is_trivially_destructible(), "Symbol types must be trivially destructible"); static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); }; static inline void assertSymbols() { AssertSymbol(); AssertSymbol(); AssertSymbol(); AssertSymbol(); AssertSymbol(); AssertSymbol(); } void printTraceSymbol(const Symbol *sym); size_t Symbol::getSymbolSize() const { switch (kind()) { case CommonKind: return sizeof(CommonSymbol); case DefinedKind: return sizeof(Defined); case LazyArchiveKind: return sizeof(LazyArchive); case LazyObjectKind: return sizeof(LazyObject); case SharedKind: return sizeof(SharedSymbol); case UndefinedKind: return sizeof(Undefined); case PlaceholderKind: return sizeof(Symbol); } llvm_unreachable("unknown symbol kind"); } // replace() replaces "this" object with a given symbol by memcpy'ing // it over to "this". This function is called as a result of name // resolution, e.g. to replace an undefind symbol with a defined symbol. void Symbol::replace(const Symbol &New) { using llvm::ELF::STT_TLS; // Symbols representing thread-local variables must be referenced by // TLS-aware relocations, and non-TLS symbols must be reference by // non-TLS relocations, so there's a clear distinction between TLS // and non-TLS symbols. It is an error if the same symbol is defined // as a TLS symbol in one file and as a non-TLS symbol in other file. if (symbolKind != PlaceholderKind && !isLazy() && !New.isLazy()) { bool tlsMismatch = (type == STT_TLS && New.type != STT_TLS) || (type != STT_TLS && New.type == STT_TLS); if (tlsMismatch) error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " + toString(New.file) + "\n>>> defined in " + toString(file)); } Symbol old = *this; memcpy(this, &New, New.getSymbolSize()); versionId = old.versionId; visibility = old.visibility; isUsedInRegularObj = old.isUsedInRegularObj; exportDynamic = old.exportDynamic; canInline = old.canInline; traced = old.traced; isPreemptible = old.isPreemptible; scriptDefined = old.scriptDefined; partition = old.partition; // Symbol length is computed lazily. If we already know a symbol length, // propagate it. if (nameData == old.nameData && nameSize == 0 && old.nameSize != 0) nameSize = old.nameSize; // Print out a log message if --trace-symbol was specified. // This is for debugging. if (traced) printTraceSymbol(this); } void maybeWarnUnorderableSymbol(const Symbol *sym); } // namespace elf } // namespace lld #endif Index: vendor/lld/dist-release_90/docs/ReleaseNotes.rst =================================================================== --- vendor/lld/dist-release_90/docs/ReleaseNotes.rst (revision 351717) +++ vendor/lld/dist-release_90/docs/ReleaseNotes.rst (revision 351718) @@ -1,101 +1,88 @@ ======================= lld 9.0.0 Release Notes ======================= .. contents:: :local: -.. warning:: - These are in-progress notes for the upcoming LLVM 9.0.0 release. - Release notes for previous releases can be found on - `the Download Page `_. - Introduction ============ This document contains the release notes for the lld linker, release 9.0.0. Here we describe the status of lld, including major improvements from the previous release. All lld releases may be downloaded from the `LLVM releases web site `_. Non-comprehensive list of changes in this release ================================================= ELF Improvements ---------------- * ld.lld now has typo suggestions for flags: ``$ ld.lld --call-shared`` now prints ``unknown argument '--call-shared', did you mean '--call_shared'``. * lld now supports replacing ``JAL`` with ``JALX`` instructions in case of MIPS - microMIPS cross-mode jumps. * lld now creates LA25 thunks for MIPS R6 code. * Put MIPS-specific .reginfo, .MIPS.options, and .MIPS.abiflags sections into corresponding PT_MIPS_REGINFO, PT_MIPS_OPTIONS, and PT_MIPS_ABIFLAGS segments. -* ... - COFF Improvements ----------------- * Like the ELF driver, lld-link now has typo suggestions for flags. * lld-link now correctly reports duplicate symbol errors for obj files that were compiled with /Gy. * lld-link now correctly reports duplicate symbol errors when several res input files define resources with the same type, name, and language. This can be demoted to a warning using ``/force:multipleres``. +* lld-link now rejects more than one resource obj input files, matching + link.exe. Previously, lld-link would silently ignore all but one. + If you hit this: Don't pass resource obj files to the linker, instead pass + res files to the linker directly. Don't put res files in static libraries, + pass them on the command line. + * Having more than two ``/natvis:`` now works correctly; it used to not work for larger binaries before. * Undefined symbols are now printed only in demangled form. Pass ``/demangle:no`` to see raw symbol names instead. * The following flags have been added: ``/functionpadmin``, ``/swaprun:``, ``/threads:no`` * Several speed and memory usage improvements. * Range extension thunks are now created for ARM64, if needed * lld-link now supports resource object files created by GNU windres and MS cvtres, not only llvm-cvtres * The generated thunks for delayimports now share the majority of code among thunks, significantly reducing the overhead of using delayimport -* ... - MinGW Improvements ------------------ * lld now correctly links crtend.o as the last object file, handling terminators for the sections such as .eh_frame properly, fixing DWARF exception handling with libgcc and gcc's crtend.o. * lld now also handles DWARF unwind info generated by GCC, when linking with libgcc * Many more GNU ld options are now supported, which e.g. allows the lld MinGW frontend to be called by GCC * PDB output can be requested without manually specifying the PDB file name, with the new option ``-pdb=`` with an empty value to the option. (The old existing syntax ``-pdb `` was more cumbersome to use with an empty parameter value.) - -MachO Improvements ------------------- - -* Item 1. - -WebAssembly Improvements ------------------------- - -* ...