mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Test Plan: revert-hammer
Differential Revision:
D31359010 (c269f471f4)
Original commit changeset: dce4b91a9891
fbshipit-source-id: 085417432b6748d3672b9b7141460f47d1c17a7f
1299 lines
40 KiB
C++
1299 lines
40 KiB
C++
// Code in this file is a heavily modified version of the dynamic loader
|
|
// from android's bionic library. Here is the license for that project:
|
|
|
|
/*
|
|
* Copyright (C) 2016 The Android Open Source Project
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <dlfcn.h>
|
|
#include <elf.h>
|
|
#include <fcntl.h>
|
|
#include <libgen.h>
|
|
#include <link.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
#include <atomic>
|
|
#include <cerrno>
|
|
#include <cinttypes>
|
|
#include <climits>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <thread>
|
|
#include <vector>
|
|
// Get PAGE_SIZE and PAGE_MASK.
|
|
#include <sys/user.h>
|
|
|
|
#include <c10/util/Optional.h>
|
|
|
|
#include <fmt/format.h>
|
|
#include <torch/csrc/deploy/loader.h>
|
|
|
|
namespace torch {
|
|
namespace deploy {
|
|
|
|
#define DEPLOY_ERROR(msg_fmt, ...) \
|
|
throw DeployLoaderError(fmt::format(msg_fmt, ##__VA_ARGS__))
|
|
|
|
#define DEPLOY_CHECK(cond, fmt, ...) \
|
|
if (!(cond)) { \
|
|
DEPLOY_ERROR(fmt, ##__VA_ARGS__); \
|
|
}
|
|
|
|
std::vector<std::string> split_path(const std::string& s, char delim) {
|
|
const char* cur = s.c_str();
|
|
const char* end = cur + s.size();
|
|
if (cur == end) {
|
|
return {};
|
|
}
|
|
std::vector<std::string> result;
|
|
while (true) {
|
|
// non-zero amount of chars
|
|
const char* next = strchr(cur, delim);
|
|
if (!next) {
|
|
result.emplace_back(std::string(cur, end));
|
|
break;
|
|
}
|
|
result.emplace_back(std::string(cur, next));
|
|
cur = next + 1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// https://stackoverflow.com/questions/23006930/the-shared-library-rpath-and-the-binary-rpath-priority/52647116#52647116
|
|
void replace_all(
|
|
std::string& str,
|
|
const std::string& from,
|
|
const std::string& to) {
|
|
if (from.empty())
|
|
return;
|
|
size_t start_pos = 0;
|
|
while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
|
|
str.replace(start_pos, from.length(), to);
|
|
start_pos += to.length(); // In case 'to' contains 'from', like replacing
|
|
// 'x' with 'yx'
|
|
}
|
|
}
|
|
|
|
std::string resolve_path(const std::string& origin, const std::string& t) {
|
|
std::string result = t;
|
|
replace_all(result, "$ORIGIN", origin);
|
|
// NOLINTNEXTLINE
|
|
char buf[PATH_MAX];
|
|
char* resolved = realpath(result.c_str(), buf);
|
|
if (!resolved) {
|
|
return result;
|
|
}
|
|
return resolved;
|
|
}
|
|
|
|
std::string resolve_origin(const std::string& so_name) {
|
|
// NOLINTNEXTLINE
|
|
char origin[PATH_MAX];
|
|
realpath(so_name.c_str(), origin);
|
|
dirname(origin);
|
|
return origin;
|
|
}
|
|
|
|
template <typename... Args>
|
|
std::string stringf(const char* format, Args... args) {
|
|
int size_s = snprintf(nullptr, 0, format, args...);
|
|
std::string result(size_s + 1, 0);
|
|
snprintf((char*)result.data(), size_s + 1, format, args...);
|
|
return result;
|
|
}
|
|
// Returns the address of the page containing address 'x'.
|
|
#define PAGE_START(x) ((x)&PAGE_MASK)
|
|
|
|
// Returns the offset of address 'x' in its page.
|
|
#define PAGE_OFFSET(x) ((x) & ~PAGE_MASK)
|
|
|
|
// Returns the address of the next page after address 'x', unless 'x' is
|
|
// itself at the start of a page.
|
|
#define PAGE_END(x) PAGE_START((x) + (PAGE_SIZE - 1))
|
|
|
|
// from bionic
|
|
// returns the size a shared library will take in memory
|
|
size_t phdr_table_get_load_size(
|
|
const Elf64_Phdr* phdr_table,
|
|
size_t phdr_count,
|
|
Elf64_Addr* out_min_vaddr,
|
|
Elf64_Addr* out_max_vaddr) {
|
|
Elf64_Addr min_vaddr = UINTPTR_MAX;
|
|
Elf64_Addr max_vaddr = 0;
|
|
|
|
bool found_pt_load = false;
|
|
for (size_t i = 0; i < phdr_count; ++i) {
|
|
const Elf64_Phdr* phdr = &phdr_table[i];
|
|
|
|
if (phdr->p_type != PT_LOAD) {
|
|
continue;
|
|
}
|
|
found_pt_load = true;
|
|
|
|
if (phdr->p_vaddr < min_vaddr) {
|
|
min_vaddr = phdr->p_vaddr;
|
|
}
|
|
|
|
if (phdr->p_vaddr + phdr->p_memsz > max_vaddr) {
|
|
max_vaddr = phdr->p_vaddr + phdr->p_memsz;
|
|
}
|
|
}
|
|
if (!found_pt_load) {
|
|
min_vaddr = 0;
|
|
}
|
|
|
|
min_vaddr = PAGE_START(min_vaddr);
|
|
max_vaddr = PAGE_END(max_vaddr);
|
|
|
|
if (out_min_vaddr != nullptr) {
|
|
*out_min_vaddr = min_vaddr;
|
|
}
|
|
if (out_max_vaddr != nullptr) {
|
|
*out_max_vaddr = max_vaddr;
|
|
}
|
|
return max_vaddr - min_vaddr;
|
|
}
|
|
|
|
#define MAYBE_MAP_FLAG(x, from, to) (((x) & (from)) ? (to) : 0)
|
|
#define PFLAGS_TO_PROT(x) \
|
|
(MAYBE_MAP_FLAG((x), PF_X, PROT_EXEC) | \
|
|
MAYBE_MAP_FLAG((x), PF_R, PROT_READ) | \
|
|
MAYBE_MAP_FLAG((x), PF_W, PROT_WRITE))
|
|
|
|
// holds a pre-computed hash for a string that is used in a GNU-style hash
|
|
// tables and also keeps track of the string length.
|
|
struct GnuHash {
|
|
GnuHash(const char* name) {
|
|
uint32_t h = 5381;
|
|
const uint8_t* name_bytes = reinterpret_cast<const uint8_t*>(name);
|
|
#pragma unroll 8
|
|
while (*name_bytes != 0) {
|
|
h += (h << 5) +
|
|
*name_bytes++; // h*33 + c = h + h * 32 + c = h + h << 5 + c
|
|
}
|
|
hash = h;
|
|
name_len = reinterpret_cast<const char*>(name_bytes) - name;
|
|
}
|
|
uint32_t hash;
|
|
uint32_t name_len;
|
|
};
|
|
|
|
// this is a special builtin in the libc++ API used for telling C++ execption
|
|
// frame unwinding about functions loaded from a pathway other than the libc
|
|
// loader. it is passed a pointer to where the EH_FRAME section was loaded,
|
|
// which appears to include frame information relative to that address.
|
|
extern "C" void __register_frame(void*);
|
|
extern "C" void __deregister_frame(void*);
|
|
|
|
// Memory maps a file into the address space read-only, and manages the lifetime
|
|
// of the mapping. Used in the loader to read in initial image, and to inspect
|
|
// ELF files for dependencies before callling dlopen.
|
|
struct MemFile {
|
|
MemFile(const char* filename_) : fd_(0), mem_(nullptr), n_bytes_(0) {
|
|
fd_ = open(filename_, O_RDONLY);
|
|
DEPLOY_CHECK(
|
|
fd_ != -1, "failed to open {}: {}", filename_, strerror(errno));
|
|
struct stat s = {0};
|
|
if (-1 == fstat(fd_, &s)) {
|
|
close(fd_); // destructors don't run during exceptions
|
|
DEPLOY_ERROR("failed to stat {}: {}", filename_, strerror(errno));
|
|
}
|
|
n_bytes_ = s.st_size;
|
|
mem_ = mmap(nullptr, n_bytes_, PROT_READ, MAP_SHARED, fd_, 0);
|
|
if (MAP_FAILED == mem_) {
|
|
close(fd_);
|
|
DEPLOY_ERROR("failed to mmap {}: {}", filename_, strerror(errno));
|
|
}
|
|
}
|
|
MemFile(const MemFile&) = delete;
|
|
const char* data() const {
|
|
return (const char*)mem_;
|
|
}
|
|
~MemFile() {
|
|
if (mem_) {
|
|
munmap((void*)mem_, n_bytes_);
|
|
}
|
|
if (fd_) {
|
|
close(fd_);
|
|
}
|
|
}
|
|
size_t size() {
|
|
return n_bytes_;
|
|
}
|
|
int fd() const {
|
|
return fd_;
|
|
}
|
|
|
|
private:
|
|
int fd_;
|
|
void* mem_;
|
|
size_t n_bytes_;
|
|
};
|
|
|
|
typedef void (*linker_dtor_function_t)();
|
|
typedef void (*linker_ctor_function_t)(int, const char**, char**);
|
|
|
|
// https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/ehframehdr.html
|
|
// note that eh_frame_ptr can be different types based on eh_frame_ptr_enc but
|
|
// we only support one sepecific encoding that is stored in a int32_t and an
|
|
// offset relative to the start of this struct.
|
|
struct EH_Frame_HDR {
|
|
char version;
|
|
char eh_frame_ptr_enc;
|
|
char fde_count_enc;
|
|
char table_enc;
|
|
int32_t eh_frame_ptr;
|
|
};
|
|
|
|
// this is the libc++ function called to lookup thread local state.
|
|
// It is passed a pointer to an object of the same shape as TLSEntry
|
|
// with the module_id and offset.
|
|
extern "C" void* __tls_get_addr(void*);
|
|
|
|
extern "C" int __cxa_thread_atexit_impl(
|
|
void (*dtor)(void*),
|
|
void* obj,
|
|
void* dso_symbol);
|
|
|
|
struct CustomLibraryImpl;
|
|
|
|
struct TLSMemory {
|
|
TLSMemory(std::shared_ptr<CustomLibraryImpl> file, size_t size)
|
|
// NOLINTNEXTLINE
|
|
: file_(std::move(file)), mem_(malloc(size)) {}
|
|
std::shared_ptr<CustomLibraryImpl> file_;
|
|
void* mem_;
|
|
~TLSMemory() {
|
|
// NOLINTNEXTLINE
|
|
free(mem_);
|
|
}
|
|
};
|
|
|
|
static void delete_TLSMemory(void* obj) {
|
|
delete ((TLSMemory*)obj);
|
|
}
|
|
|
|
// This object performs TLS emulation for modules not loaded by dlopen.
|
|
// Normally modules have a module_id that is used as a key in libc for the
|
|
// thread local data for that module. However, there is no public API for
|
|
// assigning this module id. Instead, for modules that we load, we set module_id
|
|
// to a pointer to a TLSSegment object, and replace __tls_get_addr with a
|
|
// function that calls `addr`.
|
|
|
|
// libc module_id's are sequential, so we use the top bit as a flag to see
|
|
// if we have a local TLSegment object instead. This will break if
|
|
// someone creates 2^63 sequential objects, but it is hard to imagine
|
|
// a system with enough RAM to do that.
|
|
constexpr size_t TLS_LOCAL_FLAG = (1ULL << 63);
|
|
|
|
static void* local__tls_get_addr(TLSIndex* idx);
|
|
|
|
/* LLDB puts a breakpoint in this function, and reads __deploy_module_info to
|
|
* get debug info from library. */
|
|
__attribute__((noinline)) void __deploy_register_code() {
|
|
std::cout << ""; // otherwise the breakpoint doesn't get hit, not sure if
|
|
// there is a more stable way of doing this.
|
|
};
|
|
|
|
struct DeployModuleInfo {
|
|
const char* name;
|
|
Elf64_Addr file_addr;
|
|
size_t file_size;
|
|
Elf64_Addr load_bias;
|
|
};
|
|
|
|
extern "C" {
|
|
// NOLINTNEXTLINE
|
|
DeployModuleInfo __deploy_module_info;
|
|
}
|
|
|
|
// RAII wrapper around dlopen
|
|
struct __attribute__((visibility("hidden"))) SystemLibraryImpl
|
|
: public SystemLibrary {
|
|
SystemLibraryImpl(void* handle, bool steal)
|
|
: handle_(handle), own_handle_(steal && handle != RTLD_DEFAULT) {}
|
|
|
|
at::optional<Elf64_Addr> sym(const char* name) const override {
|
|
void* r = dlsym(handle_, name);
|
|
if (!r) {
|
|
return at::nullopt;
|
|
}
|
|
return (Elf64_Addr)r;
|
|
}
|
|
|
|
at::optional<TLSIndex> tls_sym(const char* name) const override;
|
|
|
|
~SystemLibraryImpl() override {
|
|
if (own_handle_) {
|
|
dlclose(handle_);
|
|
}
|
|
}
|
|
|
|
private:
|
|
void* handle_;
|
|
bool own_handle_;
|
|
};
|
|
|
|
std::shared_ptr<SystemLibrary> SystemLibrary::create(void* handle, bool steal) {
|
|
return std::make_shared<SystemLibraryImpl>(handle, steal);
|
|
}
|
|
std::shared_ptr<SystemLibrary> SystemLibrary::create(
|
|
const char* path,
|
|
int flags) {
|
|
void* handle = dlopen(path, flags);
|
|
return SystemLibrary::create(handle, handle != nullptr);
|
|
}
|
|
|
|
// reads DT_NEEDED and DT_RUNPATH from an unloaded elf file so we can sort out
|
|
// dependencies before calling dlopen
|
|
std::pair<const char*, std::vector<const char*>> load_needed_from_elf_file(
|
|
const char* filename,
|
|
const char* data) {
|
|
auto header_ = (Elf64_Ehdr*)data;
|
|
auto program_headers = (Elf64_Phdr*)(data + header_->e_phoff);
|
|
auto n_program_headers = header_->e_phnum;
|
|
const Elf64_Dyn* dynamic = nullptr;
|
|
for (size_t i = 0; i < n_program_headers; ++i) {
|
|
const Elf64_Phdr* phdr = &program_headers[i];
|
|
if (phdr->p_type == PT_DYNAMIC) {
|
|
dynamic = reinterpret_cast<const Elf64_Dyn*>(data + phdr->p_offset);
|
|
break;
|
|
}
|
|
}
|
|
DEPLOY_CHECK(
|
|
dynamic,
|
|
"{}: could not load dynamic section for looking up DT_NEEDED",
|
|
filename);
|
|
|
|
const char* runpath = "";
|
|
std::vector<const char*> needed;
|
|
|
|
auto segment_headers = (Elf64_Shdr*)(data + header_->e_shoff);
|
|
size_t n_segments = header_->e_shnum;
|
|
const char* strtab = nullptr;
|
|
|
|
const char* segment_string_table =
|
|
data + segment_headers[header_->e_shstrndx].sh_offset;
|
|
|
|
for (size_t i = 0; i < n_segments; ++i) {
|
|
const Elf64_Shdr* shdr = &segment_headers[i];
|
|
if (shdr->sh_type == SHT_STRTAB &&
|
|
strcmp(".dynstr", segment_string_table + shdr->sh_name) == 0) {
|
|
strtab = data + shdr->sh_offset;
|
|
break;
|
|
}
|
|
}
|
|
|
|
DEPLOY_CHECK(strtab, "{}: could not load dynstr for DT_NEEDED", filename);
|
|
|
|
for (const Elf64_Dyn* d = dynamic; d->d_tag != DT_NULL; ++d) {
|
|
switch (d->d_tag) {
|
|
case DT_NEEDED:
|
|
// std::cout << "NEEDED: '" << strtab + d->d_un.d_val << "'\n";
|
|
needed.push_back(strtab + d->d_un.d_val);
|
|
break;
|
|
case DT_RPATH: /* not quite correct, because this is a different order
|
|
than runpath,
|
|
but better than not processing it at all */
|
|
case DT_RUNPATH:
|
|
// std::cout << "RUNPATH: '" << strtab + d->d_un.d_val << "'\n";
|
|
runpath = strtab + d->d_un.d_val;
|
|
break;
|
|
}
|
|
}
|
|
return std::make_pair(runpath, std::move(needed));
|
|
}
|
|
|
|
// common mechanism for reading the elf symbol table,
|
|
// and other information in the PT_DYNAMIC segment.
|
|
struct ElfDynamicInfo {
|
|
std::string name_;
|
|
const Elf64_Dyn* dynamic_ = nullptr;
|
|
Elf64_Addr load_bias_ = 0;
|
|
const Elf64_Sym* symtab_ = nullptr;
|
|
const char* strtab_ = nullptr;
|
|
size_t strtab_size_ = 0;
|
|
Elf64_Rela* plt_rela_ = nullptr;
|
|
size_t n_plt_rela_ = 0;
|
|
Elf64_Rela* rela_ = nullptr;
|
|
size_t n_rela_ = 0;
|
|
linker_ctor_function_t init_func_ = nullptr;
|
|
linker_ctor_function_t* init_array_ = nullptr;
|
|
linker_dtor_function_t fini_func_ = nullptr;
|
|
linker_dtor_function_t* fini_array_ = nullptr;
|
|
size_t n_init_array_ = 0;
|
|
size_t n_fini_array_ = 0;
|
|
size_t gnu_nbucket_ = 0;
|
|
uint32_t* gnu_bucket_ = nullptr;
|
|
uint32_t* gnu_chain_ = nullptr;
|
|
uint32_t gnu_maskwords_ = 0;
|
|
uint32_t gnu_shift2_ = 0;
|
|
Elf64_Addr* gnu_bloom_filter_ = nullptr;
|
|
std::string runpath_;
|
|
std::vector<const char*> needed_;
|
|
|
|
const char* get_string(int idx) {
|
|
return strtab_ + idx;
|
|
}
|
|
|
|
void initialize_from_dynamic_section(
|
|
std::string name,
|
|
Elf64_Dyn* dynamic,
|
|
Elf64_Addr load_bias,
|
|
bool check_absolute) {
|
|
name_ = std::move(name);
|
|
load_bias_ = load_bias;
|
|
dynamic_ = dynamic;
|
|
for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) {
|
|
void* addr = (check_absolute && d->d_un.d_ptr > load_bias_)
|
|
? reinterpret_cast<void*>(d->d_un.d_ptr)
|
|
: reinterpret_cast<void*>(load_bias_ + d->d_un.d_ptr);
|
|
auto value = d->d_un.d_val;
|
|
|
|
switch (d->d_tag) {
|
|
case DT_SYMTAB:
|
|
symtab_ = (Elf64_Sym*)addr;
|
|
break;
|
|
case DT_STRTAB:
|
|
strtab_ = (const char*)addr;
|
|
break;
|
|
|
|
case DT_STRSZ:
|
|
strtab_size_ = value;
|
|
break;
|
|
|
|
case DT_JMPREL:
|
|
plt_rela_ = (Elf64_Rela*)addr;
|
|
break;
|
|
case DT_PLTRELSZ:
|
|
n_plt_rela_ = value / sizeof(Elf64_Rela);
|
|
break;
|
|
case DT_RELA:
|
|
rela_ = (Elf64_Rela*)addr;
|
|
break;
|
|
case DT_RELASZ:
|
|
n_rela_ = value / sizeof(Elf64_Rela);
|
|
break;
|
|
|
|
case DT_INIT:
|
|
init_func_ = reinterpret_cast<linker_ctor_function_t>(
|
|
load_bias_ + d->d_un.d_ptr);
|
|
break;
|
|
|
|
case DT_FINI:
|
|
fini_func_ = reinterpret_cast<linker_dtor_function_t>(
|
|
load_bias_ + d->d_un.d_ptr);
|
|
break;
|
|
|
|
case DT_INIT_ARRAY:
|
|
init_array_ = reinterpret_cast<linker_ctor_function_t*>(
|
|
load_bias_ + d->d_un.d_ptr);
|
|
break;
|
|
|
|
case DT_INIT_ARRAYSZ:
|
|
n_init_array_ =
|
|
static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr);
|
|
break;
|
|
|
|
case DT_FINI_ARRAY:
|
|
fini_array_ = reinterpret_cast<linker_dtor_function_t*>(
|
|
load_bias_ + d->d_un.d_ptr);
|
|
break;
|
|
|
|
case DT_FINI_ARRAYSZ:
|
|
n_fini_array_ =
|
|
static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr);
|
|
break;
|
|
|
|
case DT_HASH:
|
|
break;
|
|
|
|
case DT_GNU_HASH: {
|
|
gnu_nbucket_ = reinterpret_cast<uint32_t*>(addr)[0];
|
|
// skip symndx
|
|
gnu_maskwords_ = reinterpret_cast<uint32_t*>(addr)[2];
|
|
gnu_shift2_ = reinterpret_cast<uint32_t*>(addr)[3];
|
|
gnu_bloom_filter_ =
|
|
reinterpret_cast<Elf64_Addr*>((Elf64_Addr)addr + 16);
|
|
gnu_bucket_ =
|
|
reinterpret_cast<uint32_t*>(gnu_bloom_filter_ + gnu_maskwords_);
|
|
// amend chain for symndx = header[1]
|
|
gnu_chain_ =
|
|
gnu_bucket_ + gnu_nbucket_ - reinterpret_cast<uint32_t*>(addr)[1];
|
|
--gnu_maskwords_;
|
|
} break;
|
|
}
|
|
}
|
|
|
|
if (!gnu_bucket_) {
|
|
std::cout << fmt::format(
|
|
"{}: warning, no DT_GNU_HASH found, symbol lookups on this module will not find anything.\n",
|
|
name_);
|
|
}
|
|
|
|
// pass 2 for things that require the strtab_ to be loaded
|
|
for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) {
|
|
switch (d->d_tag) {
|
|
case DT_NEEDED:
|
|
needed_.push_back(get_string(d->d_un.d_val));
|
|
break;
|
|
case DT_RPATH: /* not quite correct, because this is a different order
|
|
than runpath,
|
|
but better than not processing it at all */
|
|
case DT_RUNPATH:
|
|
runpath_ = get_string(d->d_un.d_val);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
at::optional<Elf64_Addr> sym(
|
|
const char* name,
|
|
GnuHash* precomputed_hash = nullptr) const {
|
|
if (!gnu_bucket_) {
|
|
return at::nullopt; // no hashtable was loaded
|
|
}
|
|
GnuHash hash_obj = precomputed_hash ? *precomputed_hash : GnuHash(name);
|
|
auto hash = hash_obj.hash;
|
|
auto name_len = hash_obj.name_len;
|
|
constexpr uint32_t kBloomMaskBits = sizeof(Elf64_Addr) * 8;
|
|
|
|
const uint32_t word_num = (hash / kBloomMaskBits) & gnu_maskwords_;
|
|
const Elf64_Addr bloom_word = gnu_bloom_filter_[word_num];
|
|
const uint32_t h1 = hash % kBloomMaskBits;
|
|
const uint32_t h2 = (hash >> gnu_shift2_) % kBloomMaskBits;
|
|
|
|
if ((1 & (bloom_word >> h1) & (bloom_word >> h2)) != 1) {
|
|
return at::nullopt;
|
|
}
|
|
|
|
uint32_t sym_idx = gnu_bucket_[hash % gnu_nbucket_];
|
|
if (sym_idx == 0) {
|
|
return at::nullopt;
|
|
}
|
|
|
|
uint32_t chain_value = 0;
|
|
const Elf64_Sym* sym = nullptr;
|
|
|
|
do {
|
|
sym = symtab_ + sym_idx;
|
|
chain_value = gnu_chain_[sym_idx];
|
|
if ((chain_value >> 1) == (hash >> 1)) {
|
|
if (static_cast<size_t>(sym->st_name) + name_len + 1 <= strtab_size_ &&
|
|
memcmp(strtab_ + sym->st_name, name, name_len + 1) == 0) {
|
|
// found the matching entry, is it defined?
|
|
if (sym->st_shndx != 0) {
|
|
return sym->st_value +
|
|
((ELF64_ST_TYPE(sym->st_info) == STT_TLS) ? 0 : load_bias_);
|
|
}
|
|
// symbol isn't defined
|
|
return at::nullopt;
|
|
}
|
|
}
|
|
++sym_idx;
|
|
} while ((chain_value & 1) == 0);
|
|
return at::nullopt;
|
|
}
|
|
};
|
|
|
|
// for resolving TLS offsets we need to look through
|
|
// libc's already loaded libraries. We do not have the whole
|
|
// ELF file mapped in this case just a pointer to the program headers and
|
|
// the load_bias (offset in memory) where the library was loaded.
|
|
struct AlreadyLoadedSymTable {
|
|
private:
|
|
ElfDynamicInfo dyninfo_;
|
|
|
|
public:
|
|
AlreadyLoadedSymTable(
|
|
const char* name,
|
|
Elf64_Addr load_bias,
|
|
const Elf64_Phdr* program_headers,
|
|
size_t n_program_headers) {
|
|
Elf64_Dyn* dynamic = nullptr;
|
|
for (size_t i = 0; i < n_program_headers; ++i) {
|
|
const Elf64_Phdr* phdr = &program_headers[i];
|
|
|
|
// Segment addresses in memory.
|
|
Elf64_Addr seg_start = phdr->p_vaddr + load_bias;
|
|
if (phdr->p_type == PT_DYNAMIC) {
|
|
dynamic = reinterpret_cast<Elf64_Dyn*>(seg_start);
|
|
break;
|
|
}
|
|
}
|
|
DEPLOY_CHECK(
|
|
dynamic, "%s: couldn't find PT_DYNAMIC in already loaded table.", name);
|
|
dyninfo_.initialize_from_dynamic_section(name, dynamic, load_bias, true);
|
|
}
|
|
|
|
at::optional<Elf64_Addr> sym(const char* name) {
|
|
return dyninfo_.sym(name);
|
|
}
|
|
};
|
|
static int iterate_cb(struct dl_phdr_info* info, size_t size, void* data) {
|
|
auto fn = (std::function<int(struct dl_phdr_info * info, size_t size)>*)data;
|
|
return (*fn)(info, size);
|
|
}
|
|
|
|
// we need to find a TLS offset / module_id pair for a symbol which we cannot do
|
|
// with a normal dlsym call. Instead we iterate through all loaded libraries and
|
|
// check their symbol tables for the symbol. The value of the symbol is the TLS
|
|
// offset. When we find the library we also get the module id.
|
|
at::optional<TLSIndex> slow_find_tls_symbol_offset(const char* sym_name) {
|
|
at::optional<TLSIndex> result = at::nullopt;
|
|
std::function<int(struct dl_phdr_info*, size_t)> cb =
|
|
[&](struct dl_phdr_info* info, size_t size) {
|
|
// std::cout << "SEARCHING .. " << info->dlpi_name << "\n";
|
|
AlreadyLoadedSymTable symtable(
|
|
info->dlpi_name,
|
|
info->dlpi_addr,
|
|
info->dlpi_phdr,
|
|
info->dlpi_phnum);
|
|
auto sym_addr = symtable.sym(sym_name);
|
|
if (sym_addr) {
|
|
// std::cout << "FOUND IT IN: " << info->dlpi_name << " it has modid:
|
|
// " << info->dlpi_tls_modid << "\n";
|
|
result = TLSIndex{info->dlpi_tls_modid, *sym_addr};
|
|
return 1;
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
dl_iterate_phdr(iterate_cb, (void*)&cb);
|
|
return result;
|
|
}
|
|
|
|
at::optional<TLSIndex> SystemLibraryImpl::tls_sym(const char* name) const {
|
|
if (!sym(name)) {
|
|
return at::nullopt; // before we do a bunch of slow lookups to find the
|
|
// module_id, check that this even defines the symbol
|
|
}
|
|
if (handle_ == RTLD_DEFAULT) {
|
|
return slow_find_tls_symbol_offset(name);
|
|
}
|
|
|
|
struct link_map* lm = nullptr;
|
|
DEPLOY_CHECK(
|
|
0 == dlinfo(handle_, RTLD_DI_LINKMAP, &lm), "failed to query dlinfo");
|
|
std::cout << "TLS dlinfo LOOKUP " << lm->l_name << " " << name << " "
|
|
<< "\n";
|
|
|
|
ElfDynamicInfo info;
|
|
info.initialize_from_dynamic_section(lm->l_name, lm->l_ld, lm->l_addr, true);
|
|
auto r = info.sym(name);
|
|
if (r) {
|
|
size_t module_id = 0;
|
|
DEPLOY_CHECK(
|
|
0 == dlinfo(handle_, RTLD_DI_TLS_MODID, &module_id),
|
|
"failed to query dlinfo for module_id");
|
|
return TLSIndex{module_id, *r};
|
|
}
|
|
return at::nullopt;
|
|
}
|
|
|
|
// dlopen does not accept additional search paths as an argument.
|
|
// however, normal DT_NEEDED library load inherits the runpath of parents.
|
|
// So we need to pre-find all the libraries and call dlopen on them directly to
|
|
// get the same behavior. We can find the dependencies by reading the libraries
|
|
// dynamic section for recursive DT_NEEED entries.
|
|
void resolve_needed_libraries(
|
|
std::vector<std::shared_ptr<SymbolProvider>>& libraries,
|
|
const std::string& origin_relative,
|
|
std::vector<std::string>& search_path,
|
|
const std::string& runpath_template,
|
|
const std::vector<const char*>& needed) {
|
|
size_t search_path_start_size = search_path.size();
|
|
|
|
std::string origin = resolve_origin(origin_relative);
|
|
std::vector<std::string> paths = split_path(runpath_template, ':');
|
|
// backwards because we want paths to be search in order but we search
|
|
// search_path backward
|
|
for (size_t i = paths.size(); i > 0; --i) {
|
|
search_path.emplace_back(resolve_path(origin, paths[i - 1]));
|
|
}
|
|
|
|
for (const char* name : needed) {
|
|
// std::cout << "ATTEMPTING FIND " << name << "\n";
|
|
if (strcmp(name, "libtorch_python.so") == 0) {
|
|
// torchvision expects it...
|
|
continue;
|
|
}
|
|
// find the library, either (1) it is already loaded,
|
|
// (2) it is an absolute path that exists,
|
|
// (3) we find it in the search path
|
|
// (4) we can dlopen it
|
|
|
|
// (1) the library is already loaded
|
|
const int base_flags = RTLD_LAZY | RTLD_LOCAL;
|
|
void* handle = dlopen(name, base_flags | RTLD_NOLOAD);
|
|
if (handle) {
|
|
// std::cout << "ALREADY LOADED " << name << "\n";
|
|
libraries.emplace_back(SystemLibrary::create(handle, true));
|
|
continue;
|
|
}
|
|
|
|
std::string library_path = "";
|
|
// (2) it is an absolute path
|
|
if (strchr(name, '/') != nullptr) {
|
|
library_path = name;
|
|
} else {
|
|
// (3) find it in the search path
|
|
for (size_t i = search_path.size(); i > 0; --i) {
|
|
std::stringstream ss;
|
|
ss << search_path[i - 1] << "/" << name;
|
|
if (access(ss.str().c_str(), F_OK) == 0) {
|
|
library_path = ss.str();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<std::shared_ptr<SymbolProvider>>
|
|
sublibraries; // these need to say loaded until we open library_path
|
|
// otherwise we might dlclose a sublibrary
|
|
|
|
if (library_path != "") {
|
|
// std::cout << "LOOKING FOR SUBLIBRARIES FOR FILE AT PATH " <<
|
|
// library_path << "\n"; we found the actual file, recursively load its
|
|
// deps before opening it so we resolve their paths correctly
|
|
MemFile image(library_path.c_str());
|
|
auto search =
|
|
load_needed_from_elf_file(library_path.c_str(), image.data());
|
|
resolve_needed_libraries(
|
|
sublibraries, library_path, search_path, search.first, search.second);
|
|
} else {
|
|
library_path = name;
|
|
}
|
|
|
|
// either we didn't find the file, or we have already loaded its deps
|
|
// in both cases, we now try to call dlopen. In the case where we didn't
|
|
// find the file, we hope that something like LD_LIBRARY_PATH knows where it
|
|
// is. In the case where we found it, we know its deps are loaded and
|
|
// resolved.
|
|
|
|
// std::cout << "OPENING " << library_path << "\n";
|
|
handle = dlopen(library_path.c_str(), base_flags);
|
|
DEPLOY_CHECK(
|
|
handle, "{}: could not load library, dlopen says: {}", name, dlerror());
|
|
libraries.emplace_back(SystemLibrary::create(handle, true));
|
|
}
|
|
|
|
// unwind search_path stack
|
|
search_path.erase(
|
|
search_path.begin() + search_path_start_size, search_path.end());
|
|
}
|
|
|
|
// NOLINTNEXTLINE
|
|
extern "C" void* __dso_handle;
|
|
|
|
struct __attribute__((visibility("hidden"))) CustomLibraryImpl
|
|
: public std::enable_shared_from_this<CustomLibraryImpl>,
|
|
public CustomLibrary {
|
|
CustomLibraryImpl(const char* filename, int argc, const char** argv)
|
|
: contents_(filename),
|
|
mapped_library_(nullptr),
|
|
name_(filename),
|
|
argc_(argc),
|
|
argv_(argv) {
|
|
pthread_key_create(&tls_key_, nullptr);
|
|
data_ = contents_.data();
|
|
header_ = (Elf64_Ehdr*)data_;
|
|
program_headers_ = (Elf64_Phdr*)(data_ + header_->e_phoff);
|
|
n_program_headers_ = header_->e_phnum;
|
|
}
|
|
void add_search_library(std::shared_ptr<SymbolProvider> lib) override {
|
|
symbol_search_path_.emplace_back(std::move(lib));
|
|
}
|
|
|
|
void check_library_format() {
|
|
DEPLOY_CHECK(
|
|
0 == memcmp(header_->e_ident, ELFMAG, SELFMAG),
|
|
"{}: not an ELF file",
|
|
this->name_);
|
|
DEPLOY_CHECK(
|
|
header_->e_type == ET_DYN,
|
|
"{}: is not shared object file",
|
|
this->name_);
|
|
DEPLOY_CHECK(
|
|
header_->e_ident[EI_CLASS] == ELFCLASS64,
|
|
"{}: is not ELF64 format",
|
|
this->name_);
|
|
DEPLOY_CHECK(
|
|
header_->e_ident[EI_DATA] == ELFDATA2LSB,
|
|
"{}: is not 2's complement, little endian",
|
|
this->name_);
|
|
DEPLOY_CHECK(
|
|
header_->e_machine == EM_X86_64,
|
|
"{}: is not in x86_64 format",
|
|
this->name_);
|
|
}
|
|
|
|
void reserve_address_space() {
|
|
Elf64_Addr min_vaddr = 0;
|
|
Elf64_Addr max_vaddr = 0;
|
|
mapped_size_ = phdr_table_get_load_size(
|
|
program_headers_, n_program_headers_, &min_vaddr, &max_vaddr);
|
|
mapped_library_ = mmap(
|
|
nullptr, mapped_size_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
load_bias_ =
|
|
(const char*)mapped_library_ - reinterpret_cast<const char*>(min_vaddr);
|
|
}
|
|
|
|
void load_segments() {
|
|
// from bionic
|
|
for (size_t i = 0; i < n_program_headers_; ++i) {
|
|
const Elf64_Phdr* phdr = &program_headers_[i];
|
|
|
|
// Segment addresses in memory.
|
|
Elf64_Addr seg_start = phdr->p_vaddr + load_bias_;
|
|
Elf64_Addr seg_end = seg_start + phdr->p_memsz;
|
|
|
|
switch (phdr->p_type) {
|
|
case PT_DYNAMIC:
|
|
dynamic_ = reinterpret_cast<Elf64_Dyn*>(seg_start);
|
|
break;
|
|
case PT_GNU_EH_FRAME:
|
|
eh_frame_hdr_ = reinterpret_cast<EH_Frame_HDR*>(seg_start);
|
|
DEPLOY_CHECK(
|
|
eh_frame_hdr_->eh_frame_ptr_enc == 0x1b,
|
|
"unsupported eh_frame_pointer_enc {}",
|
|
eh_frame_hdr_->eh_frame_ptr_enc);
|
|
eh_frame_ =
|
|
(void*)((int64_t)&eh_frame_hdr_->eh_frame_ptr + eh_frame_hdr_->eh_frame_ptr);
|
|
break;
|
|
case PT_TLS:
|
|
tls_file_size_ = phdr->p_filesz;
|
|
tls_mem_size_ = phdr->p_memsz;
|
|
tls_initalization_image_ = (void*)seg_start;
|
|
break;
|
|
};
|
|
|
|
if (phdr->p_type != PT_LOAD) {
|
|
continue;
|
|
}
|
|
|
|
Elf64_Addr seg_page_start = PAGE_START(seg_start);
|
|
Elf64_Addr seg_page_end = PAGE_END(seg_end);
|
|
|
|
Elf64_Addr seg_file_end = seg_start + phdr->p_filesz;
|
|
|
|
// File offsets.
|
|
Elf64_Addr file_start = phdr->p_offset;
|
|
Elf64_Addr file_end = file_start + phdr->p_filesz;
|
|
|
|
Elf64_Addr file_page_start = PAGE_START(file_start);
|
|
Elf64_Addr file_length = file_end - file_page_start;
|
|
|
|
if (contents_.size() <= 0) {
|
|
DEPLOY_ERROR(
|
|
"\"{}\" invalid file size: {}", name_.c_str(), contents_.size());
|
|
}
|
|
|
|
if (file_end > contents_.size()) {
|
|
DEPLOY_ERROR(
|
|
"invalid ELF file \"{}\" load segment[{}]:"
|
|
" p_offset ({}) + p_filesz ({}) ( = {}) past end of file "
|
|
"({})",
|
|
name_.c_str(),
|
|
i,
|
|
reinterpret_cast<void*>(phdr->p_offset),
|
|
reinterpret_cast<void*>(phdr->p_filesz),
|
|
reinterpret_cast<void*>(file_end),
|
|
contents_.size());
|
|
}
|
|
|
|
if (file_length != 0) {
|
|
int prot = PFLAGS_TO_PROT(phdr->p_flags);
|
|
|
|
void* seg_addr = mmap64(
|
|
reinterpret_cast<void*>(seg_page_start),
|
|
file_length,
|
|
prot | PROT_WRITE, // initially everything is writable to do
|
|
// relocations
|
|
MAP_FIXED | MAP_PRIVATE,
|
|
contents_.fd(),
|
|
file_page_start);
|
|
fixup_prot_.emplace_back([=]() {
|
|
mprotect(reinterpret_cast<void*>(seg_page_start), file_length, prot);
|
|
});
|
|
if (seg_addr == MAP_FAILED) {
|
|
DEPLOY_ERROR(
|
|
"couldn't map \"{}\" segment {}: {}",
|
|
name_.c_str(),
|
|
i,
|
|
strerror(errno));
|
|
}
|
|
}
|
|
|
|
// if the segment is writable, and does not end on a page boundary,
|
|
// zero-fill it until the page limit.
|
|
if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) {
|
|
memset(
|
|
reinterpret_cast<void*>(seg_file_end),
|
|
0,
|
|
PAGE_SIZE - PAGE_OFFSET(seg_file_end));
|
|
}
|
|
|
|
seg_file_end = PAGE_END(seg_file_end);
|
|
|
|
// seg_file_end is now the first page address after the file
|
|
// content. If seg_end is larger, we need to zero anything
|
|
// between them. This is done by using a private anonymous
|
|
// map for all extra pages.
|
|
if (seg_page_end > seg_file_end) {
|
|
size_t zeromap_size = seg_page_end - seg_file_end;
|
|
int prot = PFLAGS_TO_PROT(phdr->p_flags);
|
|
void* zeromap = mmap(
|
|
reinterpret_cast<void*>(seg_file_end),
|
|
zeromap_size,
|
|
prot | PROT_WRITE,
|
|
MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE,
|
|
-1,
|
|
0);
|
|
fixup_prot_.emplace_back([=]() {
|
|
mprotect(reinterpret_cast<void*>(seg_file_end), zeromap_size, prot);
|
|
});
|
|
if (zeromap == MAP_FAILED) {
|
|
DEPLOY_ERROR(
|
|
"couldn't zero fill \"{}\" gap: {}",
|
|
name_.c_str(),
|
|
strerror(errno));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
size_t module_id() const {
|
|
size_t this_as_number = (size_t)this;
|
|
return this_as_number | TLS_LOCAL_FLAG;
|
|
}
|
|
|
|
void read_dynamic_section() {
|
|
dyninfo_.initialize_from_dynamic_section(
|
|
name_, dynamic_, load_bias_, false);
|
|
std::vector<std::string> empty_search_path;
|
|
resolve_needed_libraries(
|
|
symbol_search_path_,
|
|
name_,
|
|
empty_search_path,
|
|
dyninfo_.runpath_,
|
|
dyninfo_.needed_);
|
|
}
|
|
|
|
at::optional<Elf64_Addr> lookup_symbol(Elf64_Xword r_info) {
|
|
const uint32_t r_type = ELF64_R_TYPE(r_info);
|
|
const uint32_t r_sym = ELF64_R_SYM(r_info);
|
|
|
|
if (r_sym == 0) {
|
|
return (Elf64_Addr)0;
|
|
}
|
|
auto sym_st = dyninfo_.symtab_[r_sym];
|
|
const char* sym_name = dyninfo_.get_string(sym_st.st_name);
|
|
if (r_type == R_X86_64_JUMP_SLOT) {
|
|
if (strcmp(sym_name, "__tls_get_addr") == 0) {
|
|
return (Elf64_Addr)local__tls_get_addr;
|
|
}
|
|
if (strcmp(sym_name, "__cxa_thread_atexit") == 0) {
|
|
return (Elf64_Addr)__cxa_thread_atexit_impl;
|
|
}
|
|
}
|
|
for (const auto& sys_lib : symbol_search_path_) {
|
|
auto r = sys_lib->sym(sym_name);
|
|
if (r) {
|
|
return r;
|
|
}
|
|
}
|
|
auto r = sym(sym_name);
|
|
if (r) {
|
|
return r;
|
|
}
|
|
if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) {
|
|
DEPLOY_ERROR(
|
|
"{}: '{}' symbol not found in ElfFile lookup",
|
|
name_.c_str(),
|
|
sym_name);
|
|
}
|
|
return at::nullopt;
|
|
}
|
|
|
|
at::optional<TLSIndex> tls_lookup_symbol(Elf64_Xword r_info) {
|
|
const uint32_t r_type = ELF64_R_TYPE(r_info);
|
|
const uint32_t r_sym = ELF64_R_SYM(r_info);
|
|
|
|
if (r_sym == 0) {
|
|
return TLSIndex{
|
|
module_id(),
|
|
0}; // note: offset is not queried when the symbol is blank
|
|
}
|
|
|
|
auto sym_st = dyninfo_.symtab_[r_sym];
|
|
const char* sym_name = dyninfo_.get_string(sym_st.st_name);
|
|
for (const auto& sys_lib : symbol_search_path_) {
|
|
auto r = sys_lib->tls_sym(sym_name);
|
|
if (r) {
|
|
return r;
|
|
}
|
|
}
|
|
auto r = tls_sym(sym_name);
|
|
if (r) {
|
|
return r;
|
|
}
|
|
|
|
if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) {
|
|
DEPLOY_ERROR(
|
|
"{}: '{}' symbol not found in ElfFile lookup",
|
|
name_.c_str(),
|
|
sym_name);
|
|
}
|
|
return at::nullopt;
|
|
}
|
|
|
|
void relocate_one(const Elf64_Rela& reloc) {
|
|
const uint32_t r_type = ELF64_R_TYPE(reloc.r_info);
|
|
|
|
if (r_type == 0) {
|
|
return;
|
|
}
|
|
|
|
void* const rel_target =
|
|
reinterpret_cast<void*>(reloc.r_offset + load_bias_);
|
|
|
|
// TLS relocations need to lookup symbols differently so we can get the
|
|
// module_id
|
|
if (r_type == R_X86_64_DTPMOD64 || r_type == R_X86_64_DTPOFF64) {
|
|
auto tls_index = tls_lookup_symbol(reloc.r_info);
|
|
if (!tls_index) {
|
|
return; // skip weak relocation that wasn't found
|
|
}
|
|
switch (r_type) {
|
|
case R_X86_64_DTPMOD64:
|
|
*static_cast<size_t*>(rel_target) = tls_index->module_id;
|
|
break;
|
|
case R_X86_64_DTPOFF64:
|
|
*static_cast<Elf64_Addr*>(rel_target) =
|
|
tls_index->offset + reloc.r_addend;
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
auto sym_addr = lookup_symbol(reloc.r_info);
|
|
if (!sym_addr) {
|
|
return; // skip weak relocation that wasn't found
|
|
}
|
|
|
|
switch (r_type) {
|
|
case R_X86_64_JUMP_SLOT:
|
|
case R_X86_64_64:
|
|
case R_X86_64_GLOB_DAT: {
|
|
const Elf64_Addr result = *sym_addr + reloc.r_addend;
|
|
*static_cast<Elf64_Addr*>(rel_target) = result;
|
|
} break;
|
|
case R_X86_64_RELATIVE: {
|
|
// In practice, r_sym is always zero, but if it weren't, the linker
|
|
// would still look up the referenced symbol (and abort if the symbol
|
|
// isn't found), even though it isn't used.
|
|
const Elf64_Addr result = load_bias_ + reloc.r_addend;
|
|
*static_cast<Elf64_Addr*>(rel_target) = result;
|
|
} break;
|
|
case R_X86_64_32: {
|
|
const Elf32_Addr result = *sym_addr + reloc.r_addend;
|
|
*static_cast<Elf32_Addr*>(rel_target) = result;
|
|
} break;
|
|
case R_X86_64_PC32: {
|
|
const Elf64_Addr target = *sym_addr + reloc.r_addend;
|
|
const Elf64_Addr base = reinterpret_cast<Elf64_Addr>(rel_target);
|
|
const Elf32_Addr result = target - base;
|
|
*static_cast<Elf32_Addr*>(rel_target) = result;
|
|
} break;
|
|
default:
|
|
DEPLOY_ERROR("unknown reloc type {} in \"{}\"", r_type, name_.c_str());
|
|
break;
|
|
}
|
|
}
|
|
|
|
void relocate() {
|
|
for (size_t i = 0; i < dyninfo_.n_rela_; ++i) {
|
|
relocate_one(dyninfo_.rela_[i]);
|
|
}
|
|
for (size_t i = 0; i < dyninfo_.n_plt_rela_; ++i) {
|
|
relocate_one(dyninfo_.plt_rela_[i]);
|
|
}
|
|
}
|
|
|
|
void initialize() {
|
|
call_function(dyninfo_.init_func_);
|
|
for (size_t i = 0; i < dyninfo_.n_init_array_; ++i) {
|
|
call_function(dyninfo_.init_array_[i]);
|
|
}
|
|
initialized_ = true;
|
|
}
|
|
|
|
void finalize() {
|
|
for (size_t i = dyninfo_.n_fini_array_; i > 0; --i) {
|
|
call_function(dyninfo_.fini_array_[i - 1]);
|
|
}
|
|
call_function(dyninfo_.fini_func_);
|
|
}
|
|
|
|
void register_debug_info() {
|
|
// std::cout << "target modules add " << name_.c_str() << "\n";
|
|
// std::cout << "target modules load -f " << name_.c_str() << " -s "
|
|
// << std::hex << "0x" << load_bias_ << "\n";
|
|
__deploy_module_info.name = name_.c_str();
|
|
__deploy_module_info.file_addr = (Elf64_Addr)contents_.data();
|
|
__deploy_module_info.file_size = contents_.size();
|
|
__deploy_module_info.load_bias = load_bias_;
|
|
// debugger script sets a breakpoint on this function,
|
|
// then reads __deploy_module_info to issue the target module commands.
|
|
__deploy_register_code();
|
|
}
|
|
|
|
// remove the extra write flags from read-only sections
|
|
void protect() {
|
|
for (const auto& fixup : fixup_prot_) {
|
|
fixup();
|
|
}
|
|
}
|
|
|
|
void load() override {
|
|
check_library_format();
|
|
reserve_address_space();
|
|
load_segments();
|
|
read_dynamic_section();
|
|
relocate();
|
|
protect();
|
|
__register_frame(eh_frame_);
|
|
eh_frame_registered_ = true;
|
|
register_debug_info();
|
|
initialize();
|
|
}
|
|
|
|
~CustomLibraryImpl() override {
|
|
// std::cout << "LINKER IS UNLOADING: " << name_ << "\n";
|
|
if (initialized_) {
|
|
finalize();
|
|
}
|
|
if (eh_frame_registered_) {
|
|
__deregister_frame(eh_frame_);
|
|
}
|
|
if (mapped_library_) {
|
|
munmap(mapped_library_, mapped_size_);
|
|
}
|
|
}
|
|
void call_function(linker_dtor_function_t f) {
|
|
if (f == nullptr || (int64_t)f == -1)
|
|
return;
|
|
f();
|
|
}
|
|
void call_function(linker_ctor_function_t f) {
|
|
if (f == nullptr || (int64_t)f == -1)
|
|
return;
|
|
f(argc_, argv_, environ);
|
|
}
|
|
|
|
at::optional<Elf64_Addr> sym(const char* name) const override {
|
|
return dyninfo_.sym(name);
|
|
}
|
|
|
|
at::optional<TLSIndex> tls_sym(const char* name) const override {
|
|
auto r = dyninfo_.sym(name);
|
|
if (r) {
|
|
return TLSIndex{module_id(), *r};
|
|
}
|
|
return at::nullopt;
|
|
}
|
|
|
|
void* tls_addr(size_t offset) {
|
|
// this was a TLS entry for one of our modules, so we use pthreads to
|
|
// emulate thread local state.
|
|
void* start = pthread_getspecific(tls_key_);
|
|
if (!start) {
|
|
auto tls_mem = new TLSMemory(shared_from_this(), tls_mem_size_);
|
|
__cxa_thread_atexit_impl(delete_TLSMemory, tls_mem, &__dso_handle);
|
|
start = tls_mem->mem_;
|
|
memcpy(start, tls_initalization_image_, tls_file_size_);
|
|
memset(
|
|
(void*)((const char*)start + tls_file_size_),
|
|
0,
|
|
tls_mem_size_ - tls_file_size_);
|
|
pthread_setspecific(tls_key_, start);
|
|
}
|
|
return (void*)((const char*)start + offset);
|
|
}
|
|
|
|
private:
|
|
MemFile contents_;
|
|
const char* data_ = nullptr;
|
|
const Elf64_Ehdr* header_ = nullptr;
|
|
const Elf64_Phdr* program_headers_ = nullptr;
|
|
const EH_Frame_HDR* eh_frame_hdr_ = nullptr;
|
|
void* eh_frame_ = nullptr;
|
|
size_t n_program_headers_ = 0;
|
|
void* mapped_library_ = nullptr;
|
|
size_t mapped_size_ = 0;
|
|
Elf64_Addr load_bias_ = 0;
|
|
Elf64_Dyn* dynamic_ = nullptr;
|
|
ElfDynamicInfo dyninfo_;
|
|
std::string name_;
|
|
int argc_ = 0;
|
|
const char** argv_ = nullptr;
|
|
bool initialized_ = false;
|
|
bool eh_frame_registered_ = false;
|
|
|
|
pthread_key_t tls_key_ = 0;
|
|
void* tls_initalization_image_ = nullptr;
|
|
size_t tls_file_size_ = 0;
|
|
size_t tls_mem_size_ = 0;
|
|
|
|
std::vector<std::shared_ptr<SymbolProvider>> symbol_search_path_;
|
|
std::vector<std::function<void(void)>> fixup_prot_;
|
|
};
|
|
|
|
std::shared_ptr<CustomLibrary> CustomLibrary::create(
|
|
const char* filename,
|
|
int argc,
|
|
const char** argv) {
|
|
return std::make_shared<CustomLibraryImpl>(filename, argc, argv);
|
|
}
|
|
|
|
static void* local__tls_get_addr(TLSIndex* idx) {
|
|
if ((idx->module_id & TLS_LOCAL_FLAG) != 0) {
|
|
return ((CustomLibraryImpl*)(idx->module_id & ~TLS_LOCAL_FLAG))
|
|
->tls_addr(idx->offset);
|
|
}
|
|
return __tls_get_addr(idx);
|
|
}
|
|
|
|
} // namespace deploy
|
|
} // namespace torch
|