diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp index 4b90f0e2c0..b592f3a41e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -394,6 +394,10 @@ public: const char *options, hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0; + + /// @brief Freezes @p executable + virtual hsa_status_t FreezeExecutable(Executable *executable, const char *options) = 0; + /// @brief Destroys @p executable virtual void DestroyExecutable(Executable *executable) = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp index bb8e758819..6bc2919d62 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -2329,7 +2329,7 @@ hsa_status_t hsa_executable_freeze( return HSA_STATUS_ERROR_INVALID_EXECUTABLE; } - return exec->Freeze(options); + return GetLoader()->FreezeExecutable(exec, options); CATCH; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp index 7fb9a3f412..dc53520963 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp @@ -49,17 +49,27 @@ #include #include #include +#include #include "amd_hsa_elf.h" #include "amd_hsa_kernel_code.h" #include "amd_hsa_code.hpp" #include "amd_hsa_code_util.hpp" #include "amd_options.hpp" +#include "core/util/utils.h" #include "AMDHSAKernelDescriptor.h" using namespace amd::hsa; using namespace amd::hsa::common; +static void __attribute__((noinline, optimize(0))) _loader_debug_state() {}; +r_debug _amdgpu_r_debug __attribute__((visibility("default"))) = {1, + nullptr, + reinterpret_cast(&_loader_debug_state), + r_debug::RT_CONSISTENT, + 0}; +static link_map* r_debug_tail = nullptr; + namespace amd { namespace hsa { namespace loader { @@ -140,6 +150,10 @@ Loader* Loader::Create(Context* context) void Loader::Destroy(Loader *loader) { + // Loader resets the link_map, but the executables and loaded code objects are not deleted. + _amdgpu_r_debug.r_map = nullptr; + _amdgpu_r_debug.r_state = r_debug::RT_CONSISTENT; + r_debug_tail = nullptr; delete loader; } @@ -152,9 +166,66 @@ Executable* AmdHsaCodeLoader::CreateExecutable( return executables.back(); } -void AmdHsaCodeLoader::DestroyExecutable(Executable *executable) -{ +static void AddCodeObjectInfoIntoDebugMap(link_map* map) { + if (r_debug_tail) { + r_debug_tail->l_next = map; + map->l_prev = r_debug_tail; + map->l_next = nullptr; + } else { + _amdgpu_r_debug.r_map = map; + map->l_prev = nullptr; + map->l_next = nullptr; + } + r_debug_tail = map; +} + +static void RemoveCodeObjectInfoFromDebugMap(link_map* map) { + if (r_debug_tail == map) { + r_debug_tail = map->l_prev; + } + if (map->l_prev) { + map->l_prev->l_next = map->l_next; + } + if (map->l_next) { + map->l_next->l_prev = map->l_prev; + } + + delete map->l_name; +} + +hsa_status_t AmdHsaCodeLoader::FreezeExecutable(Executable *executable, const char *options) { + hsa_status_t status = executable->Freeze(options); + if (status != HSA_STATUS_SUCCESS) { + return status; + } + + // Assumeing runtime atomic implements C++ std::memory_order WriterLockGuard writer_lock(rw_lock_); + atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_ADD, std::memory_order_relaxed); + atomic::Fence(std::memory_order_acq_rel); + _loader_debug_state(); + atomic::Fence(std::memory_order_acq_rel); + for (auto &lco : reinterpret_cast(executable)->loaded_code_objects) { + AddCodeObjectInfoIntoDebugMap(&(lco->r_debug_info)); + } + atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release); + _loader_debug_state(); + + return HSA_STATUS_SUCCESS; +} + +void AmdHsaCodeLoader::DestroyExecutable(Executable *executable) { + // Assumeing runtime atomic implements C++ std::memory_order + WriterLockGuard writer_lock(rw_lock_); + atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_DELETE, std::memory_order_relaxed); + atomic::Fence(std::memory_order_acq_rel); + _loader_debug_state(); + atomic::Fence(std::memory_order_acq_rel); + for (auto &lco : reinterpret_cast(executable)->loaded_code_objects) { + RemoveCodeObjectInfoFromDebugMap(&(lco->r_debug_info)); + } + atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release); + _loader_debug_state(); executables[((ExecutableImpl*)executable)->id()] = nullptr; delete executable; @@ -1066,8 +1137,7 @@ hsa_status_t ExecutableImpl::LoadCodeObject( s2 = range.substr(mi + 1); std::istringstream is1(s1); is1 >> n1; std::istringstream is2(s2); is2 >> n2; - } - else { + } else { std::istringstream is(range); is >> n1; n2 = n1; } @@ -1775,6 +1845,17 @@ hsa_status_t ExecutableImpl::Freeze(const char *options) { for (auto &ls : lco->LoadedSegments()) { ls->Freeze(); } + // Update code object debug info after it is frozen. + std::stringstream ss; + uint64_t elf_begin = lco->getElfData(); + uint64_t elf_size = lco->getElfSize(); + ss << "file:///proc/" << getpid() << "/mem#" + << "offset=" << std::hex << std::showbase << elf_begin << "&" + << "size=" << elf_size; + lco->r_debug_info.l_addr = lco->getDelta(); + lco->r_debug_info.l_name = strdup(ss.str().c_str()); + lco->r_debug_info.l_prev = nullptr; + lco->r_debug_info.l_next = nullptr; } state_ = HSA_EXECUTABLE_STATE_FROZEN; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp index d8b9c37264..368b9aefc5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -283,6 +284,7 @@ public: }; class LoadedCodeObjectImpl : public LoadedCodeObject, public ExecutableObject { +friend class AmdHsaCodeLoader; private: LoadedCodeObjectImpl(const LoadedCodeObjectImpl&); LoadedCodeObjectImpl& operator=(const LoadedCodeObjectImpl&); @@ -319,6 +321,8 @@ public: uint64_t getLoadBase() const override; uint64_t getLoadSize() const override; int64_t getDelta() const override; + + link_map r_debug_info; }; class Segment : public LoadedSegment, public ExecutableObject { @@ -396,6 +400,7 @@ struct ASH { typedef std::unordered_map AgentSymbolMap; class ExecutableImpl final: public Executable { +friend class AmdHsaCodeLoader; public: const hsa_profile_t& profile() const { return profile_; @@ -563,6 +568,7 @@ public: const char *options, hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override; + hsa_status_t FreezeExecutable(Executable *executable, const char *options) override; void DestroyExecutable(Executable *executable) override; hsa_status_t IterateExecutables(