diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index cd90aa401a..2fbda48629 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -103,18 +103,23 @@ inline std::vector make_kernarg( if (sizeof...(Formals) == 0) return {}; - const auto it = function_names().find( - reinterpret_cast(kernel)); - + auto it = function_names().find(reinterpret_cast(kernel)); if (it == function_names().cend()) { - throw std::runtime_error{"Undefined __global__ function."}; + it = + function_names(true).find(reinterpret_cast(kernel)); + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } } - const auto it1 = kernargs().find(it->second); - + auto it1 = kernargs().find(it->second); if (it1 == kernargs().end()) { - throw std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}; + it1 = kernargs(true).find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } } std::tuple to_formals{std::move(actuals)}; diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index 92bef22172..da13c7c3db 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -99,8 +99,9 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); -std::unordered_map< - std::string, std::vector>>& kernargs(); +const std::unordered_map< + std::string, std::vector>>& + kernargs(bool rebuild = false); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/src/program_state.cpp b/src/program_state.cpp index 38cae74dcc..bb906b0ad9 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -538,6 +538,7 @@ const unordered_map>>& fu // created previously function_names(rebuild); + kernargs(rebuild); kernels(rebuild); globals(rebuild); } @@ -585,12 +586,12 @@ unordered_map& globals(bool rebuild) { return r; } - -unordered_map>>& kernargs() { +const unordered_map>>& kernargs( + bool rebuild) { static unordered_map>> r; static once_flag f; - call_once(f, []() { + static const auto build_map = [](decltype(r)& x) { for (auto&& isa_blobs : code_object_blobs()) { for (auto&& blob : isa_blobs.second) { stringstream tmp{std::string{blob.cbegin(), blob.cend()}}; @@ -598,10 +599,28 @@ unordered_map>>& kernargs() { elfio reader; if (!reader.load(tmp)) continue; - read_kernarg_metadata(reader, r); + read_kernarg_metadata(reader, x); } } - }); + }; + call_once(f, []() { r.reserve(function_names().size()); build_map(r); }); + + if (rebuild) { + static mutex mtx; + thread_local static decltype(r) tmp; + + { + lock_guard lck{mtx}; + + tmp.insert(r.cbegin(), r.cend()); // Should use merge in C++17. + } + + build_map(tmp); + + lock_guard lck{mtx}; + + r.insert(tmp.cbegin(), tmp.cend()); + } return r; }