From 7bcb83a05f111b8657fb00d68b29ecd852f81250 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 18 Dec 2018 23:05:39 +0000 Subject: [PATCH 1/3] Start re-working 731 for 2.0. [ROCm/hip commit: 25c7e5d6096330075559a1fea50a58ffb1a41295] --- .../hip/hcc_detail/functional_grid_launch.hpp | 38 +++++-- .../include/hip/hcc_detail/program_state.hpp | 2 + projects/hip/src/program_state.cpp | 104 ++++++++++++++++++ 3 files changed, 137 insertions(+), 7 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index e678f25aa2..cd90aa401a 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -33,6 +33,7 @@ THE SOFTWARE. #include #include +#include #include #include #include @@ -56,7 +57,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple&) { + const std::tuple&, + const std::vector>&, + std::vector kernarg) { return kernarg; } @@ -65,7 +68,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple& formals) { + const std::tuple& formals, + const std::vector>& size_align, + std::vector kernarg) { using T = typename std::tuple_element>::type; static_assert( @@ -80,24 +85,43 @@ inline std::vector make_kernarg( #endif kernarg.resize(round_up_to_next_multiple_nonnegative( - kernarg.size(), alignof(T)) + sizeof(T)); + kernarg.size(), size_align[n].second) + size_align[n].first); - new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get(formals)}; + std::memcpy( + kernarg.data() + kernarg.size() - size_align[n].first, + &std::get(formals), + size_align[n].first); - return make_kernarg(std::move(kernarg), formals); + return make_kernarg(formals, size_align, std::move(kernarg)); } template inline std::vector make_kernarg( - void (*)(Formals...), std::tuple actuals) { + void (*kernel)(Formals...), std::tuple actuals) { static_assert(sizeof...(Formals) == sizeof...(Actuals), "The count of formal arguments must match the count of actuals."); + if (sizeof...(Formals) == 0) return {}; + + const auto it = function_names().find( + reinterpret_cast(kernel)); + + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } + + const auto it1 = kernargs().find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } + std::tuple to_formals{std::move(actuals)}; std::vector kernarg; kernarg.reserve(sizeof(to_formals)); - return make_kernarg<0>(std::move(kernarg), to_formals); + return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); } void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, diff --git a/projects/hip/include/hip/hcc_detail/program_state.hpp b/projects/hip/include/hip/hcc_detail/program_state.hpp index bdb87b3509..92bef22172 100644 --- a/projects/hip/include/hip/hcc_detail/program_state.hpp +++ b/projects/hip/include/hip/hcc_detail/program_state.hpp @@ -99,6 +99,8 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); +std::unordered_map< + std::string, std::vector>>& kernargs(); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 88cdeeb404..922d827be4 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -340,6 +340,90 @@ void load_code_object_and_freeze_executable( code_readers.push_back(move(tmp)); } } + +size_t parse_args( + const string& metadata, + size_t f, + size_t l, + vector>& size_align) { + if (f == l) return f; + if (!size_align.empty()) return l; + + do { + static constexpr size_t size_sz{5}; + f = metadata.find("Size:", f) + size_sz; + + if (l <= f) return f; + + auto size = strtoul(&metadata[f], nullptr, 10); + + static constexpr size_t align_sz{6}; + f = metadata.find("Align:", f) + align_sz; + + char* l{}; + auto align = strtoul(&metadata[f], &l, 10); + + f += (l - &metadata[f]) + 1; + + size_align.emplace_back(size, align); + } while (true); +} + +void read_kernarg_metadata( + elfio& reader, + unordered_map>>& kernargs) +{ // TODO: this is inefficient. + auto it = find_section_if( + reader, [](const section* x) { return x->get_type() == SHT_NOTE; }); + + if (!it) return; + + const note_section_accessor acc{reader, it}; + for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { + ELFIO::Elf_Word type{}; + string name{}; + void* desc{}; + Elf_Word desc_size{}; + + acc.get_note(i, type, name, desc, desc_size); + + if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. + + string tmp{ + static_cast(desc), static_cast(desc) + desc_size}; + + auto dx = tmp.find("Kernels:"); + + if (dx == string::npos) continue; + + static constexpr decltype(tmp.size()) kernels_sz{8}; + dx += kernels_sz; + + do { + dx = tmp.find("Name:", dx); + + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) name_sz{5}; + dx = tmp.find_first_not_of(" '", dx + name_sz); + + auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); + dx += fn.size(); + + auto dx1 = tmp.find("CodeProps", dx); + dx = tmp.find("Args:", dx); + + if (dx1 < dx) { + dx = dx1; + continue; + } + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) args_sz{5}; + dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); + } while (true); + } +} } // namespace namespace hip_impl { @@ -501,6 +585,26 @@ unordered_map& globals(bool rebuild) { return r; } + +unordered_map>>& kernargs() { + static unordered_map>> r; + static once_flag f; + + call_once(f, []() { + for (auto&& blob : code_object_blobs()) { + stringstream tmp{std::string{ + blob.second.front().cbegin(), blob.second.front().cend()}}; + + elfio reader; + if (!reader.load(tmp)) continue; + + read_kernarg_metadata(reader, r); + } + }); + + return r; +} + hsa_executable_t load_executable(const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader; From e127990e23173d412e54e0d0291c102c9fca3b8e Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 19 Dec 2018 03:13:57 +0000 Subject: [PATCH 2/3] More blobs, more problems. [ROCm/hip commit: 340674ceb6ee471e50ed957b51d6a27847176c0f] --- projects/hip/src/program_state.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 922d827be4..38cae74dcc 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -591,14 +591,15 @@ unordered_map>>& kernargs() { static once_flag f; call_once(f, []() { - for (auto&& blob : code_object_blobs()) { - stringstream tmp{std::string{ - blob.second.front().cbegin(), blob.second.front().cend()}}; + for (auto&& isa_blobs : code_object_blobs()) { + for (auto&& blob : isa_blobs.second) { + stringstream tmp{std::string{blob.cbegin(), blob.cend()}}; - elfio reader; - if (!reader.load(tmp)) continue; + elfio reader; + if (!reader.load(tmp)) continue; - read_kernarg_metadata(reader, r); + read_kernarg_metadata(reader, r); + } } }); From 587745b841bd0f9726d75f271255b77f2562ef28 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Thu, 20 Dec 2018 00:26:42 +0000 Subject: [PATCH 3/3] Hook into the creaky lazy-reinit machinery. Try to minimise race-risk. [ROCm/hip commit: ec14daa7cee0ea5c1073dfc3c369c5ab1a50b37d] --- .../hip/hcc_detail/functional_grid_launch.hpp | 21 +++++++++----- .../include/hip/hcc_detail/program_state.hpp | 5 ++-- projects/hip/src/program_state.cpp | 29 +++++++++++++++---- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index cd90aa401a..2fbda48629 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -103,18 +103,23 @@ inline std::vector make_kernarg( if (sizeof...(Formals) == 0) return {}; - const auto it = function_names().find( - reinterpret_cast(kernel)); - + auto it = function_names().find(reinterpret_cast(kernel)); if (it == function_names().cend()) { - throw std::runtime_error{"Undefined __global__ function."}; + it = + function_names(true).find(reinterpret_cast(kernel)); + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } } - const auto it1 = kernargs().find(it->second); - + auto it1 = kernargs().find(it->second); if (it1 == kernargs().end()) { - throw std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}; + it1 = kernargs(true).find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } } std::tuple to_formals{std::move(actuals)}; diff --git a/projects/hip/include/hip/hcc_detail/program_state.hpp b/projects/hip/include/hip/hcc_detail/program_state.hpp index 92bef22172..da13c7c3db 100644 --- a/projects/hip/include/hip/hcc_detail/program_state.hpp +++ b/projects/hip/include/hip/hcc_detail/program_state.hpp @@ -99,8 +99,9 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); -std::unordered_map< - std::string, std::vector>>& kernargs(); +const std::unordered_map< + std::string, std::vector>>& + kernargs(bool rebuild = false); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 38cae74dcc..bb906b0ad9 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -538,6 +538,7 @@ const unordered_map>>& fu // created previously function_names(rebuild); + kernargs(rebuild); kernels(rebuild); globals(rebuild); } @@ -585,12 +586,12 @@ unordered_map& globals(bool rebuild) { return r; } - -unordered_map>>& kernargs() { +const unordered_map>>& kernargs( + bool rebuild) { static unordered_map>> r; static once_flag f; - call_once(f, []() { + static const auto build_map = [](decltype(r)& x) { for (auto&& isa_blobs : code_object_blobs()) { for (auto&& blob : isa_blobs.second) { stringstream tmp{std::string{blob.cbegin(), blob.cend()}}; @@ -598,10 +599,28 @@ unordered_map>>& kernargs() { elfio reader; if (!reader.load(tmp)) continue; - read_kernarg_metadata(reader, r); + read_kernarg_metadata(reader, x); } } - }); + }; + call_once(f, []() { r.reserve(function_names().size()); build_map(r); }); + + if (rebuild) { + static mutex mtx; + thread_local static decltype(r) tmp; + + { + lock_guard lck{mtx}; + + tmp.insert(r.cbegin(), r.cend()); // Should use merge in C++17. + } + + build_map(tmp); + + lock_guard lck{mtx}; + + r.insert(tmp.cbegin(), tmp.cend()); + } return r; }