#include "../include/hip/hcc_detail/program_state.hpp" #include "../include/hip/hcc_detail/code_object_bundle.hpp" #include "hip_hcc_internal.h" #include "hsa_helpers.hpp" #include "trace_helper.h" #include "elfio/elfio.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace ELFIO; using namespace hip_impl; using namespace std; namespace { struct Symbol { string name; ELFIO::Elf64_Addr value = 0; Elf_Xword size = 0; Elf_Half sect_idx = 0; uint8_t bind = 0; uint8_t type = 0; uint8_t other = 0; }; inline Symbol read_symbol(const symbol_section_accessor& section, unsigned int idx) { assert(idx < section.get_symbols_num()); Symbol r; section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); return r; } template inline section* find_section_if(elfio& reader, P p) { const auto it = find_if(reader.sections.begin(), reader.sections.end(), move(p)); return it != reader.sections.end() ? *it : nullptr; } vector copy_names_of_undefined_symbols(const symbol_section_accessor& section) { vector r; for (auto i = 0u; i != section.get_symbols_num(); ++i) { // TODO: this is boyscout code, caching the temporaries // may be of worth. auto tmp = read_symbol(section, i); if (tmp.sect_idx == SHN_UNDEF && !tmp.name.empty()) { r.push_back(std::move(tmp.name)); } } return r; } const std::unordered_map>& symbol_addresses(bool rebuild = false) { static unordered_map> r; static once_flag f; auto cons = [rebuild]() { if (rebuild) { r.clear(); } dl_iterate_phdr( [](dl_phdr_info* info, size_t, void*) { static constexpr const char self[] = "/proc/self/exe"; elfio reader; static unsigned int iter = 0u; if (reader.load(!iter ? self : info->dlpi_name)) { auto it = find_section_if( reader, [](const class section* x) { return x->get_type() == SHT_SYMTAB; }); if (it) { const symbol_section_accessor symtab{reader, it}; for (auto i = 0u; i != symtab.get_symbols_num(); ++i) { auto tmp = read_symbol(symtab, i); if (tmp.type == STT_OBJECT && tmp.sect_idx != SHN_UNDEF) { const auto addr = tmp.value + (iter ? info->dlpi_addr : 0); r.emplace(move(tmp.name), make_pair(addr, tmp.size)); } } } ++iter; } return 0; }, nullptr); }; call_once(f, cons); if (rebuild) { cons(); } return r; } void associate_code_object_symbols_with_host_allocation(const elfio& reader, section* code_object_dynsym, hsa_agent_t agent, hsa_executable_t executable) { if (!code_object_dynsym) return; const auto undefined_symbols = copy_names_of_undefined_symbols(symbol_section_accessor{reader, code_object_dynsym}); for (auto&& x : undefined_symbols) { if (globals().find(x) != globals().cend()) return; const auto it1 = symbol_addresses().find(x); if (it1 == symbol_addresses().cend()) { throw runtime_error{"Global symbol: " + x + " is undefined."}; } static mutex mtx; lock_guard lck{mtx}; if (globals().find(x) != globals().cend()) return; globals().emplace(x, (void*)(it1->second.first)); void* p = nullptr; hsa_amd_memory_lock(reinterpret_cast(it1->second.first), it1->second.second, nullptr, // All agents. 0, &p); hsa_executable_agent_global_variable_define(executable, agent, x.c_str(), p); } } vector code_object_blob_for_process() { static constexpr const char self[] = "/proc/self/exe"; static constexpr const char kernel_section[] = ".kernel"; elfio reader; if (!reader.load(self)) { throw runtime_error{"Failed to load ELF file for current process."}; } auto kernels = find_section_if(reader, [](const section* x) { return x->get_name() == kernel_section; }); vector r; if (kernels) { r.insert(r.end(), kernels->get_data(), kernels->get_data() + kernels->get_size()); } return r; } const unordered_map>>& code_object_blobs(bool rebuild = false) { static unordered_map>> r; static once_flag f; auto cons = [rebuild]() { // names of shared libraries who .kernel sections already loaded static unordered_set lib_names; static vector> blobs{code_object_blob_for_process()}; if (rebuild) { r.clear(); blobs.clear(); } dl_iterate_phdr( [](dl_phdr_info* info, std::size_t, void*) { elfio tmp; if ((lib_names.find(info->dlpi_name) == lib_names.end()) && (tmp.load(info->dlpi_name))) { const auto it = find_section_if( tmp, [](const section* x) { return x->get_name() == ".kernel"; }); if (it) { blobs.emplace_back( it->get_data(), it->get_data() + it->get_size()); // register the shared library as already loaded lib_names.emplace(info->dlpi_name); } } return 0; }, nullptr); for (auto&& blob : blobs) { Bundled_code_header tmp{blob}; if (valid(tmp)) { for (auto&& bundle : bundles(tmp)) { r[triple_to_hsa_isa(bundle.triple)].push_back(bundle.blob); } } } }; call_once(f, cons); if (rebuild) { cons(); } return r; } vector> function_names_for(const elfio& reader, section* symtab) { vector> r; symbol_section_accessor symbols{reader, symtab}; for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { // TODO: this is boyscout code, caching the temporaries // may be of worth. auto tmp = read_symbol(symbols, i); if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { r.emplace_back(tmp.value, tmp.name); } } return r; } const vector>& function_names_for_process(bool rebuild = false) { static constexpr const char self[] = "/proc/self/exe"; static vector> r; static once_flag f; auto cons = [rebuild]() { elfio reader; if (!reader.load(self)) { throw runtime_error{"Failed to load the ELF file for the current process."}; } auto symtab = find_section_if(reader, [](const section* x) { return x->get_type() == SHT_SYMTAB; }); if (symtab) r = function_names_for(reader, symtab); }; call_once(f, cons); if (rebuild) { cons(); } return r; } const unordered_map>& kernels(bool rebuild = false) { static unordered_map> r; static once_flag f; auto cons = [rebuild]() { if (rebuild) { r.clear(); executables(rebuild); } static const auto copy_kernels = [](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t s, void*) { if (type(s) == HSA_SYMBOL_KIND_KERNEL) r[name(s)].push_back(s); return HSA_STATUS_SUCCESS; }; for (auto&& agent_executables : executables()) { for (auto&& executable : agent_executables.second) { hsa_executable_iterate_agent_symbols(executable, agent_executables.first, copy_kernels, nullptr); } } }; call_once(f, cons); if (rebuild) { cons(); } return r; } void load_code_object_and_freeze_executable( const string& file, hsa_agent_t agent, hsa_executable_t executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. static const auto cor_deleter = [](hsa_code_object_reader_t* p) { if (p) { hsa_code_object_reader_destroy(*p); delete p; } }; using RAII_code_reader = unique_ptr; if (!file.empty()) { RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter}; hsa_code_object_reader_create_from_memory(file.data(), file.size(), tmp.get()); hsa_executable_load_agent_code_object(executable, agent, *tmp, nullptr, nullptr); hsa_executable_freeze(executable, nullptr); static vector code_readers; static mutex mtx; lock_guard lck{mtx}; code_readers.push_back(move(tmp)); } } } // namespace namespace hip_impl { const unordered_map>& executables(bool rebuild) { // TODO: This leaks the hsa_executable_ts, it should use RAII. static unordered_map> r; static once_flag f; auto cons = [rebuild]() { static const auto accelerators = hc::accelerator::get_all(); if (rebuild) { // do NOT clear r so we reuse instances of hsa_executable_t // created previously code_object_blobs(rebuild); } for (auto&& acc : accelerators) { auto agent = static_cast(acc.get_hsa_agent()); if (!agent || !acc.is_hsa_accelerator()) continue; hsa_agent_iterate_isas(*agent, [](hsa_isa_t x, void* pa) { const auto it = code_object_blobs().find(x); if (it != code_object_blobs().cend()) { hsa_agent_t a = *static_cast(pa); for (auto&& blob : it->second) { hsa_executable_t tmp = {}; hsa_executable_create_alt( HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr, &tmp); // TODO: this is massively inefficient and only // meant for illustration. string blob_to_str{blob.cbegin(), blob.cend()}; tmp = load_executable(blob_to_str, tmp, a); if (tmp.handle) r[a].push_back(tmp); } } return HSA_STATUS_SUCCESS; }, agent); } }; call_once(f, cons); if (rebuild) { cons(); } return r; } const unordered_map& function_names(bool rebuild) { static unordered_map r{function_names_for_process().cbegin(), function_names_for_process().cend()}; static once_flag f; auto cons = [rebuild]() { if (rebuild) { r.clear(); function_names_for_process(rebuild); r.insert(function_names_for_process().cbegin(), function_names_for_process().cend()); } dl_iterate_phdr( [](dl_phdr_info* info, size_t, void*) { elfio tmp; if (tmp.load(info->dlpi_name)) { const auto it = find_section_if( tmp, [](const section* x) { return x->get_type() == SHT_SYMTAB; }); if (it) { auto n = function_names_for(tmp, it); for (auto&& f : n) f.first += info->dlpi_addr; r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); } } return 0; }, nullptr); }; call_once(f, cons); if (rebuild) { static mutex mtx; lock_guard lck{mtx}; cons(); } return r; } const unordered_map>>& functions(bool rebuild) { static unordered_map>> r; static once_flag f; auto cons = [rebuild]() { if (rebuild) { // do NOT clear r so we reuse instances of pair // created previously function_names(rebuild); kernels(rebuild); globals(rebuild); } for (auto&& function : function_names()) { const auto it = kernels().find(function.second); if (it != kernels().cend()) { for (auto&& kernel_symbol : it->second) { r[function.first].emplace_back( agent(kernel_symbol), Kernel_descriptor{kernel_object(kernel_symbol), it->first}); } } } }; call_once(f, cons); if (rebuild) { static mutex mtx; lock_guard lck{mtx}; cons(); } return r; } unordered_map& globals(bool rebuild) { static unordered_map r; static once_flag f; auto cons =[rebuild]() { if (rebuild) { r.clear(); symbol_addresses(rebuild); } r.reserve(symbol_addresses().size()); }; call_once(f, cons); if (rebuild) { cons(); } return r; } hsa_executable_t load_executable(const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader; stringstream tmp{file}; if (!reader.load(tmp)) return hsa_executable_t{}; const auto code_object_dynsym = find_section_if( reader, [](const ELFIO::section* x) { return x->get_type() == SHT_DYNSYM; }); associate_code_object_symbols_with_host_allocation(reader, code_object_dynsym, agent, executable); load_code_object_and_freeze_executable(file, agent, executable); return executable; } // HIP startup kernel loader logic // When enabled HIP_STARTUP_LOADER, HIP will load the kernels and setup // the function symbol map on program startup class startup_kernel_loader { private: startup_kernel_loader() { functions(); } startup_kernel_loader(const startup_kernel_loader&) = delete; startup_kernel_loader& operator=(const startup_kernel_loader&) = delete; static startup_kernel_loader skl; }; extern "C" void __attribute__((constructor)) __startup_kernel_loader_init() { if (atoi(std::getenv("HIP_STARTUP_LOADER")) == 1) functions(); } extern "C" void __attribute__((destructor)) __startup_kernel_loader_fini() { } } // Namespace hip_impl.