diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index fca88f8094..6128a4c158 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -73,6 +73,9 @@ public: hsa_executable_t load_executable(const char*, const size_t, hsa_executable_t, hsa_agent_t); + hsa_executable_t load_executable_no_copy(const char*, const size_t, + hsa_executable_t, + hsa_agent_t); void* global_addr_by_name(const char* name); diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 4c8ae07134..93f5a82a2f 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -89,9 +89,9 @@ __hipRegisterFatBinary(const void* data) reinterpret_cast(header) + desc->offset), desc->size}; if (HIP_DUMP_CODE_OBJECT) __hipDumpCodeObject(image); - module->executable = hip_impl::get_program_state().load_executable(image.data(), image.size(), - module->executable, - agent); + module->executable = hip_impl::get_program_state().load_executable_no_copy( + reinterpret_cast(header) + desc->offset, desc->size, + module->executable, agent); if (module->executable.handle) { modules->at(deviceId) = module; diff --git a/src/program_state.cpp b/src/program_state.cpp index 5e9f9976be..975dcda321 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -68,7 +68,14 @@ namespace hip_impl { const size_t data_size, hsa_executable_t executable, hsa_agent_t agent) { - return impl->load_executable(data, data_size, executable, agent); + return impl->load_executable(data, data_size, true, executable, agent); + } + + hsa_executable_t program_state::load_executable_no_copy(const char* data, + const size_t data_size, + hsa_executable_t executable, + hsa_agent_t agent) { + return impl->load_executable(data, data_size, false, executable, agent); } hipFunction_t program_state::kernel_descriptor(std::uintptr_t function_address, diff --git a/src/program_state.inl b/src/program_state.inl index c62b8f4061..760dafea22 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -406,11 +406,13 @@ public: } void load_code_object_and_freeze_executable( - const std::string& file, hsa_agent_t agent, hsa_executable_t executable) { + const char* data, + const size_t data_size, bool make_copy, + hsa_agent_t agent, hsa_executable_t executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. - if (file.empty()) return; + if (!data_size) return; static const auto cor_deleter = [] (hsa_code_object_reader_t* p) { if (!p) return; @@ -423,8 +425,16 @@ public: decltype(code_readers.second)::iterator it; { std::lock_guard lck{code_readers.first}; + + std::string file; + if (make_copy) + file = std::string(data, data_size); + code_readers.second.emplace_back(move(file), move(tmp)); it = std::prev(code_readers.second.end()); + + if (make_copy) + data = it->first.data(); } auto check_hsa_error = [](hsa_status_t s) { @@ -438,7 +448,7 @@ public: }; check_hsa_error(hsa_code_object_reader_create_from_memory( - it->first.data(), it->first.size(), it->second.get())); + data, data_size, it->second.get())); check_hsa_error(hsa_executable_load_agent_code_object( executable, agent, *it->second, nullptr, nullptr)); @@ -485,7 +495,7 @@ public: // TODO: this is massively inefficient and only meant for // illustration. - tmp = impl.load_executable(blob.data(), blob.size(), tmp, a); + tmp = impl.load_executable(blob.data(), blob.size(), true, tmp, a); if (tmp.handle) current_exes.push_back(tmp); } @@ -503,6 +513,7 @@ public: hsa_executable_t load_executable(const char* data, const size_t data_size, + bool make_copy, hsa_executable_t executable, hsa_agent_t agent) { ELFIO::elfio reader; @@ -519,7 +530,7 @@ public: code_object_dynsym, agent, executable); - load_code_object_and_freeze_executable(move(ts), agent, executable); + load_code_object_and_freeze_executable(data, data_size, make_copy, agent, executable); return executable; }