From 1fbf6399620aededcd558e9ec630af50ecfd03bc Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Thu, 29 Nov 2018 11:38:37 -0500 Subject: [PATCH] Revert "Rely on code object metadat for kernarg arguments alignof and sizeof." This reverts commit fe1e963299c1f4b63652e91a3f156d8a043d317b. --- .../hip/hcc_detail/functional_grid_launch.hpp | 37 ++----- include/hip/hcc_detail/program_state.hpp | 2 - src/program_state.cpp | 102 +----------------- 3 files changed, 9 insertions(+), 132 deletions(-) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index ba9929c0a6..e678f25aa2 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -33,7 +33,6 @@ THE SOFTWARE. #include #include -#include #include #include #include @@ -57,9 +56,7 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - const std::tuple&, - const std::vector>&, - std::vector kernarg) { + std::vector kernarg, const std::tuple&) { return kernarg; } @@ -68,9 +65,7 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - const std::tuple& formals, - const std::vector>& size_align, - std::vector kernarg) { + std::vector kernarg, const std::tuple& formals) { using T = typename std::tuple_element>::type; static_assert( @@ -85,42 +80,24 @@ inline std::vector make_kernarg( #endif kernarg.resize(round_up_to_next_multiple_nonnegative( - kernarg.size(), size_align[n].second) + - size_align[n].first); + kernarg.size(), alignof(T)) + sizeof(T)); - std::memcpy( - kernarg.data() + kernarg.size() - size_align[n].first, - &std::get(formals), - size_align[n].first); + new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get(formals)}; - return make_kernarg(formals, size_align, std::move(kernarg)); + return make_kernarg(std::move(kernarg), formals); } template inline std::vector make_kernarg( - void (*kernel)(Formals...), std::tuple actuals) { + void (*)(Formals...), std::tuple actuals) { static_assert(sizeof...(Formals) == sizeof...(Actuals), "The count of formal arguments must match the count of actuals."); - const auto it = function_names().find( - reinterpret_cast(kernel)); - - if (it == function_names().cend()) { - throw std::runtime_error{"Undefined __global__ function."}; - } - - const auto it1 = kernargs().find(it->second); - - if (it1 == kernargs().end()) { - throw std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}; - } - std::tuple to_formals{std::move(actuals)}; std::vector kernarg; kernarg.reserve(sizeof(to_formals)); - return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); + return make_kernarg<0>(std::move(kernarg), to_formals); } void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index 92bef22172..bdb87b3509 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -99,8 +99,6 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); -std::unordered_map< - std::string, std::vector>>& kernargs(); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/src/program_state.cpp b/src/program_state.cpp index b490c0ee25..88cdeeb404 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -312,8 +312,8 @@ const unordered_map>& kernels(bool rebui void load_code_object_and_freeze_executable( const string& file, hsa_agent_t agent, - hsa_executable_t executable) { - // TODO: the following sequence is inefficient, should be refactored + hsa_executable_t + executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. static const auto cor_deleter = [](hsa_code_object_reader_t* p) { @@ -340,85 +340,6 @@ void load_code_object_and_freeze_executable( code_readers.push_back(move(tmp)); } } - -size_t parse_args( - const string& metadata, - size_t f, - size_t l, - vector>& size_align) { - if (f == l) return f; - - do { - static constexpr size_t size_sz{5}; - f = metadata.find("Size:", f) + size_sz; - - if (l <= f) return f; - - auto size = strtoul(&metadata[f], nullptr, 10); - - static constexpr size_t align_sz{6}; - f = metadata.find("Align:", f) + align_sz; - - char* l{}; - auto align = strtoul(&metadata[f], &l, 10); - - f += (l - &metadata[f]) + 1; - - size_align.emplace_back(size, align); - } while (true); -} - -void read_kernarg_metadata( - elfio& reader, - unordered_map>>& kernargs) -{ // TODO: this is inefficient. - auto it = find_section_if( - reader, [](const section* x) { return x->get_type() == SHT_NOTE; }); - - if (!it) return; - - const note_section_accessor acc{reader, it}; - for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { - ELFIO::Elf_Word type{}; - string name{}; - void* desc{}; - Elf_Word desc_size{}; - - acc.get_note(i, type, name, desc, desc_size); - - if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. - - string tmp{ - static_cast(desc), static_cast(desc) + desc_size}; - - auto dx = tmp.find("Kernels:"); - - if (dx == string::npos) continue; - - static constexpr decltype(tmp.size()) kernels_sz{8}; - dx += kernels_sz; - - do { - dx = tmp.find("Name:", dx); - - if (dx == string::npos) break; - - static constexpr decltype(tmp.size()) name_sz{5}; - dx = tmp.find_first_not_of(' ', dx + name_sz); - - auto fn = tmp.substr(dx, tmp.find('\n', dx) - dx); - dx += fn.size(); - - dx = tmp.find("Args:", dx); - - if (dx == string::npos) break; - - static constexpr decltype(tmp.size()) args_sz{5}; - dx = parse_args( - tmp, dx + args_sz, tmp.find("CodeProps", dx), kernargs[fn]); - } while (true); - } -} } // namespace namespace hip_impl { @@ -580,25 +501,6 @@ unordered_map& globals(bool rebuild) { return r; } -unordered_map>>& kernargs() { - static unordered_map>> r; - static once_flag f; - - call_once(f, []() { - for (auto&& blob : code_object_blobs()) { - stringstream tmp{std::string{ - blob.second.front().cbegin(), blob.second.front().cend()}}; - - elfio reader; - if (!reader.load(tmp)) continue; - - read_kernarg_metadata(reader, r); - } - }); - - return r; -} - hsa_executable_t load_executable(const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader;