From a331990ee4dfe75675cd3743c4c85f3848abddb3 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 22 Jul 2020 21:10:22 -0500 Subject: [PATCH] optimization mechanism fix: correct tracker handler; kernel name query on completion; Change-Id: I14da152b4ac3c7d8fd1af2f54e9d71f834071622 [ROCm/rocprofiler commit: 80747de208f4e78d7292c3144f96d981e9165947] --- projects/rocprofiler/build.sh | 2 +- projects/rocprofiler/inc/rocprofiler.h | 10 +- projects/rocprofiler/src/core/context.h | 1 + .../rocprofiler/src/core/hsa_interceptor.h | 63 +++++++++++- .../rocprofiler/src/core/intercept_queue.h | 31 ++---- projects/rocprofiler/src/core/tracker.h | 2 +- .../rocprofiler/src/util/hsa_rsrc_factory.cpp | 5 +- projects/rocprofiler/test/run.sh | 9 ++ projects/rocprofiler/test/tool/tool.cpp | 96 ++++++++++--------- .../test/util/hsa_rsrc_factory.cpp | 79 +++++++++------ .../rocprofiler/test/util/hsa_rsrc_factory.h | 94 +++++++++++++++++- 11 files changed, 285 insertions(+), 107 deletions(-) diff --git a/projects/rocprofiler/build.sh b/projects/rocprofiler/build.sh index dd7cb8b686..542d5b2df3 100755 --- a/projects/rocprofiler/build.sh +++ b/projects/rocprofiler/build.sh @@ -3,4 +3,4 @@ BIN_DIR=`dirname $0` BLD_DIR=$BIN_DIR/build export CMAKE_PREFIX_PATH=/opt/rocm/include/hsa:/opt/rocm -rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest && ./run.sh +rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index 7fc57aca66..b176cadf4b 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -480,7 +480,8 @@ typedef enum { ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback - ROCPROFILER_HSA_CB_ID_SUBMIT = 3 // Packet submit callback + ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback + ROCPROFILER_HSA_CB_ID_KSYMBOL = 4 // Loading/unloading of kernel symbol } rocprofiler_hsa_cb_id_t; // HSA callback data type @@ -511,6 +512,12 @@ typedef struct { uint32_t device_type; // type of device the packed is submitted to uint32_t device_id; // id of device the packed is submitted to } submit; + struct { + uint64_t object; // kernel symbol object + const char* name; // kernel symbol name + uint32_t name_length; // kernel symbol name length + int destroy; // symbol executable destroy + } ksymbol; }; } rocprofiler_hsa_callback_data_t; @@ -526,6 +533,7 @@ typedef struct { rocprofiler_hsa_callback_fun_t device; // agent assign callback rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback rocprofiler_hsa_callback_fun_t submit; // packet submit callback + rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback } rocprofiler_hsa_callbacks_t; // Set callbacks. If the callback is NULL then it is disabled. diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index 02150734ed..b03906ffab 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -363,6 +363,7 @@ class Context { ~Context() { Destruct(); } void Destruct() { + hsa_signal_destroy(dispatch_signal_); for (const auto& v : info_map_) { const std::string& name = v.first; const rocprofiler_feature_t* info = v.second; diff --git a/projects/rocprofiler/src/core/hsa_interceptor.h b/projects/rocprofiler/src/core/hsa_interceptor.h index f1d8a0d814..9207730b79 100644 --- a/projects/rocprofiler/src/core/hsa_interceptor.h +++ b/projects/rocprofiler/src/core/hsa_interceptor.h @@ -25,6 +25,7 @@ SOFTWARE. #ifndef _SRC_CORE_HSA_INTERCEPTOR_H #define _SRC_CORE_HSA_INTERCEPTOR_H +#include #include #include #include @@ -49,7 +50,8 @@ SOFTWARE. (ID == ROCPROFILER_HSA_CB_ID_ALLOCATE) ? callbacks_.allocate: \ (ID == ROCPROFILER_HSA_CB_ID_DEVICE) ? callbacks_.device: \ (ID == ROCPROFILER_HSA_CB_ID_MEMCOPY) ? callbacks_.memcopy: \ - callbacks_.submit; \ + (ID == ROCPROFILER_HSA_CB_ID_SUBMIT) ? callbacks_.submit: \ + callbacks_.ksymbol; \ if ((__callback != NULL) && (recursion_ == false)) #define DO_HSA_CALLBACK \ @@ -62,6 +64,14 @@ SOFTWARE. #define ISSUE_HSA_CALLBACK(ID) \ do { IS_HSA_CALLBACK(ID) { DO_HSA_CALLBACK; } } while(0) +// Demangle C++ symbol name +static const char* cpp_demangle(const char* symname) { + size_t size = 0; + int status; + const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status); + return (ret != 0) ? ret : strdup(symname); +} + namespace rocprofiler { extern decltype(hsa_memory_allocate)* hsa_memory_allocate_fn; extern decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn; @@ -337,6 +347,39 @@ class HsaInterceptor { return HSA_STATUS_SUCCESS; } + static hsa_status_t KernelSymbolCallback( + hsa_executable_t executable, + hsa_executable_symbol_t symbol, + void *arg) + { + const int free_flag = reinterpret_cast(arg); + hsa_symbol_kind_t kind = (hsa_symbol_kind_t)0; + HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &kind)); + + if (kind == HSA_SYMBOL_KIND_KERNEL) { + const char* name = NULL; + uint32_t len = 0; + uint64_t obj = 0; + HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &obj)); + if (free_flag == 0) { + HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len)); + char sym_name[len + 1]; + HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name)); + name = cpp_demangle(sym_name); + } + + rocprofiler_hsa_callback_data_t data{}; + data.ksymbol.object = obj; + data.ksymbol.name = name; + data.ksymbol.name_length = len; + data.ksymbol.destroy = free_flag; + + ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL); + } + + return HSA_STATUS_SUCCESS; + } + static hsa_status_t ExecutableFreeze( hsa_executable_t executable, const char *options) @@ -352,6 +395,15 @@ class HsaInterceptor { reinterpret_cast(0)); } + { + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) { + HSA_RT(hsa_executable_iterate_symbols( + executable, + KernelSymbolCallback, + reinterpret_cast(0))); + } + } + return status; } @@ -367,6 +419,15 @@ class HsaInterceptor { reinterpret_cast(1)); } + { + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) { + HSA_RT(hsa_executable_iterate_symbols( + executable, + KernelSymbolCallback, + reinterpret_cast(1))); + } + } + HSA_RT(hsa_executable_destroy_fn(executable)); return status; diff --git a/projects/rocprofiler/src/core/intercept_queue.h b/projects/rocprofiler/src/core/intercept_queue.h index f0bf06a4d0..5cd09b108e 100644 --- a/projects/rocprofiler/src/core/intercept_queue.h +++ b/projects/rocprofiler/src/core/intercept_queue.h @@ -24,7 +24,6 @@ THE SOFTWARE. #define _SRC_CORE_INTERCEPT_QUEUE_H #include -#include #include #include @@ -165,12 +164,7 @@ class InterceptQueue { const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast(packet); const hsa_signal_t completion_signal = dispatch_packet->completion_signal; -#if 0 - // Prepareing dispatch callback data - uint64_t kernel_object = dispatch_packet->kernel_object; - const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object); - const char* kernel_name = QueryKernelName(kernel_object, kernel_code); -#endif + rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, obj->agent_info_->dev_index, obj->queue_, @@ -178,18 +172,15 @@ class InterceptQueue { obj->queue_id, completion_signal, dispatch_packet, - NULL, // kernel_name - 0, // kernel_object - NULL, // kernel_code + NULL, // kernel_name + 0, // kernel_object + NULL, // kernel_code 0, // (uint32_t)syscall(__NR_gettid), - NULL}; + NULL}; // record // Calling dispatch callback rocprofiler_group_t group = {}; hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group); -#if 0 - free(const_cast(kernel_name)); -#endif Context* context = reinterpret_cast(group.context); // Injecting profiling start/stop packets if ((status == HSA_STATUS_SUCCESS) && (context != NULL)) { @@ -306,7 +297,6 @@ class InterceptQueue { // Calling dispatch callback rocprofiler_group_t group = {}; hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group); - free(const_cast(kernel_name)); // Injecting profiling start/stop packets if ((status != HSA_STATUS_SUCCESS) || (group.context == NULL)) { if (tracker_entry != NULL) { @@ -445,7 +435,6 @@ class InterceptQueue { // Calling dispatch callback rocprofiler_group_t group = {}; hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group); - free(const_cast(kernel_name)); // Injecting profiling start/stop packets if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) { @@ -539,14 +528,6 @@ class InterceptQueue { return (dbg_info != NULL) ? dbg_info->kernel_name : NULL; } - // Demangle C++ symbol name - static const char* cpp_demangle(const char* symname) { - size_t size = 0; - int status; - const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status); - return (ret != 0) ? ret : strdup(symname); - } - static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) { const uint16_t kernel_object_flag = *((uint64_t*)kernel_code + 1); if (kernel_object_flag == 0) { @@ -557,7 +538,7 @@ class InterceptQueue { const char* kernel_symname = (util::HsaRsrcFactory::IsExecutableTracking()) ? util::HsaRsrcFactory::GetKernelNameRef(kernel_object) : GetKernelName(kernel_code->runtime_loader_kernel_symbol); - return cpp_demangle(kernel_symname); + return kernel_symname; } // method to get an intercept queue object diff --git a/projects/rocprofiler/src/core/tracker.h b/projects/rocprofiler/src/core/tracker.h index a6819c8304..d538aff720 100644 --- a/projects/rocprofiler/src/core/tracker.h +++ b/projects/rocprofiler/src/core/tracker.h @@ -167,7 +167,7 @@ class Tracker { hsa_signal_t& dispatch_signal = context->GetDispatchSignal(); util::HsaRsrcFactory::Instance().HsaApi()->hsa_signal_store_screlease(dispatch_signal, signal_value); hsa_status_t status = - util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler, group); + util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler_opt, group); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); } diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp index 78833284fd..e2f97ce9fc 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp @@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -626,6 +627,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br &kernelSymbol); CHECK_STATUS("Error in looking up kernel symbol", status); + close(file_handle); + // Update output parameter *code_desc = kernelSymbol; return true; @@ -705,7 +708,7 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) { std::lock_guard lck(mutex_); const auto it = symbols_map_->find(addr); if (it == symbols_map_->end()) { - fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); + fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr); abort(); } return it->second; diff --git a/projects/rocprofiler/test/run.sh b/projects/rocprofiler/test/run.sh index 4f5baf0e05..d137073852 100755 --- a/projects/rocprofiler/test/run.sh +++ b/projects/rocprofiler/test/run.sh @@ -128,6 +128,15 @@ export ROCP_THRS=10 export ROCP_INPUT=pmc_input.xml eval_test "'rocprof' libtool PMC n-thread test" ./test/ctrl +export ROCP_OPT_MODE=1 +export ROCP_KITER=20 +export ROCP_DITER=20 +export ROCP_AGENTS=1 +export ROCP_THRS=10 +export ROCP_INPUT=pmc_input.xml +eval_test "'rocprof' libtool PMC n-thread opt test" ./test/ctrl +unset ROCP_OPT_MODE + export ROCP_KITER=20 export ROCP_DITER=20 export ROCP_AGENTS=1 diff --git a/projects/rocprofiler/test/tool/tool.cpp b/projects/rocprofiler/test/tool/tool.cpp index 865710e57c..1ee5850612 100644 --- a/projects/rocprofiler/test/tool/tool.cpp +++ b/projects/rocprofiler/test/tool/tool.cpp @@ -100,7 +100,7 @@ struct context_entry_t { unsigned feature_count; rocprofiler_callback_data_t data; kernel_properties_t kernel_properties; - uint64_t kernel_object; + HsaRsrcFactory::symbols_map_it_t kernel_name_it; FILE* file_handle; }; @@ -503,7 +503,7 @@ void output_group(const context_entry_t* entry, const char* label) { } // Dump stored context entry -bool dump_context_entry(context_entry_t* entry) { +bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { hsa_status_t status = HSA_STATUS_ERROR; volatile std::atomic* valid = reinterpret_cast*>(&entry->valid); @@ -548,7 +548,7 @@ bool dump_context_entry(context_entry_t* entry) { fprintf(file_handle, "\n"); fflush(file_handle); } - if (record) { + if (record && to_clean) { delete record; entry->data.record = NULL; } @@ -566,11 +566,11 @@ bool dump_context_entry(context_entry_t* entry) { std::ostringstream oss; oss << index << "__" << filtr_kernel_name(entry->data.kernel_name); output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); - free(const_cast(entry->data.kernel_name)); + if (to_clean) free(const_cast(entry->data.kernel_name)); // Finishing cleanup // Deleting profiling context will delete all allocated resources - rocprofiler_close(group.context); + if (to_clean) rocprofiler_close(group.context); } return true; @@ -644,31 +644,6 @@ bool context_handler(rocprofiler_group_t group, void* arg) { return false; } -static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) { - const amd_kernel_code_t* kernel_code = NULL; - hsa_status_t status = - HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address( - reinterpret_cast(kernel_object), - reinterpret_cast(&kernel_code)); - if (HSA_STATUS_SUCCESS != status) { - kernel_code = reinterpret_cast(kernel_object); - } - return kernel_code; -} - -// Demangle C++ symbol name -static const char* cpp_demangle(const char* symname) { - size_t size = 0; - int status; - const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status); - return (ret != 0) ? ret : strdup(symname); -} - -static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) { - const char* kernel_symname = HsaRsrcFactory::GetKernelNameRef(kernel_object); - return cpp_demangle(kernel_symname); -} - // Profiling completion handler // Dump context entry bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) { @@ -677,25 +652,22 @@ bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) { handler_arg_t* handler_arg = reinterpret_cast(arg); ctx_entry->features = handler_arg->features; ctx_entry->feature_count = handler_arg->feature_count; + ctx_entry->data.kernel_name = ctx_entry->kernel_name_it->second.name; ctx_entry->file_handle = result_file_handle; - const uint64_t kernel_object = ctx_entry->kernel_object; - const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object); - ctx_entry->data.kernel_name = QueryKernelName(kernel_object, kernel_code); - if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); abort(); } - dump_context_entry(ctx_entry); + dump_context_entry(ctx_entry, false); if (pthread_mutex_unlock(&mutex) != 0) { perror("pthread_mutex_unlock"); abort(); } - free((void*)(ctx_entry->data.kernel_name)); + HsaRsrcFactory::ReleaseKernelNameRef(ctx_entry->kernel_name_it); return false; } @@ -766,13 +738,36 @@ bool check_filter(const rocprofiler_callback_data_t* callback_data, const callba return found; } +static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) { + const amd_kernel_code_t* kernel_code = NULL; + hsa_status_t status = + HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address( + reinterpret_cast(kernel_object), + reinterpret_cast(&kernel_code)); + if (HSA_STATUS_SUCCESS != status) { + kernel_code = reinterpret_cast(kernel_object); + } + return kernel_code; +} + // Setting kernel properties void set_kernel_properties(const rocprofiler_callback_data_t* callback_data, - kernel_properties_t* kernel_properties_ptr) + context_entry_t* entry) { const hsa_kernel_dispatch_packet_t* packet = callback_data->packet; + kernel_properties_t* kernel_properties_ptr = &(entry->kernel_properties); const amd_kernel_code_t* kernel_code = callback_data->kernel_code; + entry->data = *callback_data; + + if (kernel_code == NULL) { + const uint64_t kernel_object = callback_data->packet->kernel_object; + kernel_code = GetKernelCode(kernel_object); + entry->kernel_name_it = HsaRsrcFactory::AcquireKernelNameRef(kernel_object); + } else { + entry->data.kernel_name = strdup(callback_data->kernel_name); + } + uint64_t grid_size = packet->grid_size_x * packet->grid_size_y * packet->grid_size_z; if (grid_size > UINT32_MAX) abort(); kernel_properties_ptr->grid_size = (uint32_t)grid_size; @@ -806,7 +801,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, // Context entry context_entry_t* entry = alloc_context_entry(); // Setting kernel properties - set_kernel_properties(callback_data, &(entry->kernel_properties)); + set_kernel_properties(callback_data, entry); // context properties rocprofiler_properties_t properties{}; @@ -852,8 +847,6 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, entry->group = *group; entry->features = features; entry->feature_count = feature_count; - entry->data = *callback_data; - entry->data.kernel_name = strdup(callback_data->kernel_name); entry->file_handle = tool_data->file_handle; entry->active = true; reinterpret_cast*>(&entry->valid)->store(true); @@ -881,7 +874,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d rocprofiler_t* context = pool_entry.context; context_entry_t* entry = reinterpret_cast(pool_entry.payload); // Setting kernel properties - set_kernel_properties(callback_data, &(entry->kernel_properties)); + set_kernel_properties(callback_data, entry); // Get group[0] status = rocprofiler_get_group(context, 0, group); check_status(status); @@ -890,8 +883,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d entry->index = UINT32_MAX; entry->agent = agent; entry->group = *group; - entry->data = *callback_data; - entry->kernel_object = callback_data->packet->kernel_object; + reinterpret_cast*>(&entry->valid)->store(true); return status; } @@ -1120,9 +1112,19 @@ rocprofiler_hsa_callbacks_t hsa_callbacks { hsa_unified_callback, hsa_unified_callback, hsa_unified_callback, - hsa_unified_callback + hsa_unified_callback, + NULL }; +// HSA kernel symbol callback +hsa_status_t hsa_ksymbol_cb(rocprofiler_hsa_cb_id_t id, + const rocprofiler_hsa_callback_data_t* data, + void* arg) +{ + HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.destroy); + return HSA_STATUS_SUCCESS; +} + // Tool constructor extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) { @@ -1467,6 +1469,12 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) callbacks_ptrs.destroy = destroy_callback; rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg); + + rocprofiler_hsa_callbacks_t cs{}; + cs.ksymbol = hsa_ksymbol_cb; + rocprofiler_set_hsa_callbacks(cs, NULL); + settings->code_obj_tracking = 0; + settings->hsa_intercepting = 1; } else { // Adding dispatch observer rocprofiler_queue_callbacks_t callbacks_ptrs{0}; diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp index 10f9fbc1e6..7d3301a30e 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp @@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "util/hsa_rsrc_factory.h" +#include #include #include #include @@ -36,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -44,6 +46,14 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +// Demangle C++ symbol name +static const char* cpp_demangle(const char* symname) { + size_t size = 0; + int status; + const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status); + return (ret != 0) ? ret : strdup(symname); +} + // Callback function to get available in the system agents hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) { hsa_status_t status = HSA_STATUS_ERROR; @@ -192,6 +202,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; + hsa_api_.hsa_executable_destroy = table->core_->hsa_executable_destroy_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; @@ -232,6 +243,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; hsa_api_.hsa_executable_freeze = hsa_executable_freeze; + hsa_api_.hsa_executable_destroy = hsa_executable_destroy; hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; @@ -618,6 +630,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br &kernelSymbol); CHECK_STATUS("Error in looking up kernel symbol", status); + close(file_handle); + // Update output parameter *code_desc = kernelSymbol; return true; @@ -693,52 +707,57 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s return write_idx; } -const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) { - std::lock_guard lck(mutex_); - const auto it = symbols_map_->find(addr); - if (it == symbols_map_->end()) { - fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); - abort(); - } - return it->second; -} - -void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { - std::lock_guard lck(mutex_); - executable_tracking_on_ = true; - table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; -} - -hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *arg) { hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); CHECK_STATUS("Error in getting symbol info", status); + if (value == HSA_SYMBOL_KIND_KERNEL) { uint64_t addr = 0; - uint32_t len = 0; status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); CHECK_STATUS("Error in getting kernel object", status); - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); - CHECK_STATUS("Error in getting name len", status); - char *name = new char[len + 1]; - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); - CHECK_STATUS("Error in getting kernel name", status); - name[len] = 0; - auto ret = symbols_map_->insert({addr, name}); - if (ret.second == false) { - delete[] ret.first->second; - ret.first->second = name; + + const int to_free = reinterpret_cast(arg); + const char* name = NULL; + if (to_free == 0) { + uint32_t len = 0; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + CHECK_STATUS("Error in getting name len", status); + char sym_name[len + 1]; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name); + CHECK_STATUS("Error in getting kernel name", status); + sym_name[len] = 0; + name = cpp_demangle(sym_name); } + + SetKernelNameRef(addr, name, to_free); } + return HSA_STATUS_SUCCESS; } hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { std::lock_guard lck(mutex_); if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; - hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)0); CHECK_STATUS("Error in iterating executable symbols", status); - return hsa_api_.hsa_executable_freeze(executable, options);; + return hsa_api_.hsa_executable_freeze(executable, options); +} + +hsa_status_t HsaRsrcFactory::hsa_executable_destroy_interceptor(hsa_executable_t executable) { + std::lock_guard lck(mutex_); + if (symbols_map_ != NULL) { + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)1); + CHECK_STATUS("Error in iterating executable symbols", status); + } + return hsa_api_.hsa_executable_destroy(executable); +} + +void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { + std::lock_guard lck(mutex_); + executable_tracking_on_ = true; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; + table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor; } std::atomic HsaRsrcFactory::instance_{}; diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.h b/projects/rocprofiler/test/util/hsa_rsrc_factory.h index e857813b24..ca5a6e7a14 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.h @@ -95,6 +95,7 @@ struct hsa_pfn_t { decltype(hsa_executable_create_alt)* hsa_executable_create_alt; decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; decltype(hsa_executable_freeze)* hsa_executable_freeze; + decltype(hsa_executable_destroy)* hsa_executable_destroy; decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; @@ -286,6 +287,13 @@ class HsaRsrcFactory { typedef std::recursive_mutex mutex_t; typedef HsaTimer::timestamp_t timestamp_t; + // Executables loading tracking + struct symbols_map_data_t { + const char* name; + uint64_t refs_count; + }; + typedef std::map symbols_map_t; + static HsaRsrcFactory* Create(bool initialize_hsa = true) { std::lock_guard lck(mutex_); HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed); @@ -406,7 +414,88 @@ class HsaRsrcFactory { // Enable executables loading tracking static bool IsExecutableTracking() { return executable_tracking_on_; } static void EnableExecutableTracking(HsaApiTable* table); - static const char* GetKernelNameRef(uint64_t addr); + + typedef symbols_map_t::iterator symbols_map_it_t; + + static inline const char* GetKernelNameRef(const uint64_t& addr) { + if (symbols_map_ == NULL) { + fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr); + abort(); + } + + std::lock_guard lck(mutex_); + + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr); + abort(); + } + + return it->second.name; + } + + static inline symbols_map_it_t AcquireKernelNameRef(const uint64_t& addr) { + if (symbols_map_ == NULL) { + fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr); + abort(); + } + + std::lock_guard lck(mutex_); + + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr); + abort(); + } + + std::atomic* atomic_ptr = + reinterpret_cast*>(&(it->second.refs_count)); + atomic_ptr->fetch_add(1, std::memory_order_relaxed); + + return it; + } + + static inline void ReleaseKernelNameRef(const symbols_map_it_t& it) { + std::atomic* atomic_ptr = + reinterpret_cast*>(&(it->second.refs_count)); + atomic_ptr->fetch_sub(1, std::memory_order_relaxed); + } + + static inline void SetKernelNameRef(const uint64_t& addr, const char* name, const int& free) { + if (symbols_map_ == NULL) { + std::lock_guard lck(mutex_); + if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; + } + + auto it = symbols_map_->find(addr); + if (it != symbols_map_->end()) { + while (1) { + while(it->second.refs_count != 0) sched_yield(); + mutex_.lock(); + if (it->second.refs_count == 0) break; + mutex_.unlock(); + } + } + + if (it != symbols_map_->end()) { + delete[] it->second.name; + if (free == 1) { + symbols_map_->erase(it); + } else { + fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to set kernel addr (0x%lx) conflict\n", addr); + abort(); + } + } else { + if (free == 0) { + symbols_map_->insert({addr, symbols_map_data_t{name, 0}}); + } else { + fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to free kernel addr (0x%lx) not found\n", addr); + abort(); + } + } + + mutex_.unlock(); + } // Initialize HSA API table void static InitHsaApiTable(HsaApiTable* table); @@ -492,11 +581,10 @@ class HsaRsrcFactory { // System agents map std::map agent_map_; - // Executables loading tracking - typedef std::map symbols_map_t; static symbols_map_t* symbols_map_; static bool executable_tracking_on_; static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); + static hsa_status_t hsa_executable_destroy_interceptor(hsa_executable_t executable); static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); // HSA runtime API table