diff --git a/projects/hip/include/hip/hcc_detail/program_state.hpp b/projects/hip/include/hip/hcc_detail/program_state.hpp index ac689fdb89..bdb87b3509 100644 --- a/projects/hip/include/hip/hcc_detail/program_state.hpp +++ b/projects/hip/include/hip/hcc_detail/program_state.hpp @@ -93,11 +93,12 @@ public: } }; -const std::unordered_map>& executables(); +const std::unordered_map>& executables( + bool rebuild = false); const std::unordered_map>>& -functions(); -const std::unordered_map& function_names(); -std::unordered_map& globals(); +functions(bool rebuild = false); +const std::unordered_map& function_names(bool rebuild = false); +std::unordered_map& globals(bool rebuild = false); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/projects/hip/src/functional_grid_launch.inl b/projects/hip/src/functional_grid_launch.inl index 704cf9ffeb..6283d1aaba 100644 --- a/projects/hip/src/functional_grid_launch.inl +++ b/projects/hip/src/functional_grid_launch.inl @@ -92,13 +92,19 @@ namespace hip_impl hipStream_t stream, void** kernarg) { - const auto it0 = functions().find(function_address); + auto it0 = functions().find(function_address); if (it0 == functions().cend()) { - throw runtime_error{ - "No device code available for function: " + - name(function_address) - }; + // Re-init device code maps once again to help locate kernels + // loaded after HIP runtime initialization via means such as + // dlopen(). + it0 = functions(true).find(function_address); + if (it0 == functions().cend()) { + throw runtime_error{ + "No device code available for function: " + + name(function_address) + }; + } } auto agent = target_agent(stream); diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index e8aff28faf..f5e56db6be 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -74,11 +74,15 @@ vector copy_names_of_undefined_symbols(const symbol_section_accessor& se } const std::unordered_map>& -symbol_addresses() { +symbol_addresses(bool rebuild = false) { static unordered_map> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { + if (rebuild) { + r.clear(); + } + dl_iterate_phdr( [](dl_phdr_info* info, size_t, void*) { static constexpr const char self[] = "/proc/self/exe"; @@ -108,7 +112,12 @@ symbol_addresses() { return 0; }, nullptr); - }); + }; + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; } @@ -166,13 +175,18 @@ vector code_object_blob_for_process() { return r; } -const unordered_map>>& code_object_blobs() { +const unordered_map>>& code_object_blobs(bool rebuild = false) { static unordered_map>> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { static vector> blobs{code_object_blob_for_process()}; + if (rebuild) { + blobs.clear(); + blobs.push_back(code_object_blob_for_process()); + } + dl_iterate_phdr( [](dl_phdr_info* info, std::size_t, void*) { elfio tmp; @@ -194,7 +208,13 @@ const unordered_map>>& code_object_blobs() { } } } - }); + }; + + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; } @@ -216,13 +236,13 @@ vector> function_names_for(const elfio& reader, section* return r; } -const vector>& function_names_for_process() { +const vector>& function_names_for_process(bool rebuild = false) { static constexpr const char self[] = "/proc/self/exe"; static vector> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { elfio reader; if (!reader.load(self)) { @@ -233,16 +253,26 @@ const vector>& function_names_for_process() { find_section_if(reader, [](const section* x) { return x->get_type() == SHT_SYMTAB; }); if (symtab) r = function_names_for(reader, symtab); - }); + }; + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; } -const unordered_map>& kernels() { +const unordered_map>& kernels(bool rebuild = false) { static unordered_map> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { + if (rebuild) { + r.clear(); + executables(rebuild); + } + static const auto copy_kernels = [](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t s, void*) { if (type(s) == HSA_SYMBOL_KIND_KERNEL) r[name(s)].push_back(s); @@ -256,7 +286,12 @@ const unordered_map>& kernels() { copy_kernels, nullptr); } } - }); + }; + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; } @@ -295,13 +330,18 @@ void load_code_object_and_freeze_executable( namespace hip_impl { const unordered_map>& -executables() { // TODO: This leaks the hsa_executable_ts, it should use RAII. +executables(bool rebuild) { // TODO: This leaks the hsa_executable_ts, it should use RAII. static unordered_map> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { static const auto accelerators = hc::accelerator::get_all(); + if (rebuild) { + r.clear(); + code_object_blobs(rebuild); + } + for (auto&& acc : accelerators) { auto agent = static_cast(acc.get_hsa_agent()); @@ -335,17 +375,29 @@ executables() { // TODO: This leaks the hsa_executable_ts, it should use RAII. }, agent); } - }); + }; + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; } -const unordered_map& function_names() { +const unordered_map& function_names(bool rebuild) { static unordered_map r{function_names_for_process().cbegin(), function_names_for_process().cend()}; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { + if (rebuild) { + r.clear(); + function_names_for_process(rebuild); + r.insert(function_names_for_process().cbegin(), + function_names_for_process().cend()); + } + dl_iterate_phdr( [](dl_phdr_info* info, size_t, void*) { elfio tmp; @@ -365,16 +417,30 @@ const unordered_map& function_names() { return 0; }, nullptr); - }); + }; + + call_once(f, cons); + if (rebuild) { + static mutex mtx; + lock_guard lck{mtx}; + cons(); + } return r; } -const unordered_map>>& functions() { +const unordered_map>>& functions(bool rebuild) { static unordered_map>> r; static once_flag f; - call_once(f, []() { + auto cons = [rebuild]() { + if (rebuild) { + r.clear(); + function_names(rebuild); + kernels(rebuild); + globals(rebuild); + } + for (auto&& function : function_names()) { const auto it = kernels().find(function.second); @@ -386,15 +452,34 @@ const unordered_map>>& fu } } } - }); + }; + + call_once(f, cons); + if (rebuild) { + static mutex mtx; + lock_guard lck{mtx}; + cons(); + } return r; } -unordered_map& globals() { +unordered_map& globals(bool rebuild) { static unordered_map r; static once_flag f; - call_once(f, []() { r.reserve(symbol_addresses().size()); }); + auto cons =[rebuild]() { + if (rebuild) { + r.clear(); + symbol_addresses(rebuild); + } + + r.reserve(symbol_addresses().size()); + }; + + call_once(f, cons); + if (rebuild) { + cons(); + } return r; }