optimization mechanism fix: correct tracker handler; kernel name query on completion;

Change-Id: I14da152b4ac3c7d8fd1af2f54e9d71f834071622


[ROCm/rocprofiler commit: 80747de208]
Этот коммит содержится в:
Evgeny
2020-07-22 21:10:22 -05:00
родитель 832ab03e46
Коммит a331990ee4
11 изменённых файлов: 285 добавлений и 107 удалений
+1 -1
Просмотреть файл
@@ -3,4 +3,4 @@ BIN_DIR=`dirname $0`
BLD_DIR=$BIN_DIR/build
export CMAKE_PREFIX_PATH=/opt/rocm/include/hsa:/opt/rocm
rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest && ./run.sh
rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest
+9 -1
Просмотреть файл
@@ -480,7 +480,8 @@ typedef enum {
ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback
ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
ROCPROFILER_HSA_CB_ID_SUBMIT = 3 // Packet submit callback
ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
ROCPROFILER_HSA_CB_ID_KSYMBOL = 4 // Loading/unloading of kernel symbol
} rocprofiler_hsa_cb_id_t;
// HSA callback data type
@@ -511,6 +512,12 @@ typedef struct {
uint32_t device_type; // type of device the packed is submitted to
uint32_t device_id; // id of device the packed is submitted to
} submit;
struct {
uint64_t object; // kernel symbol object
const char* name; // kernel symbol name
uint32_t name_length; // kernel symbol name length
int destroy; // symbol executable destroy
} ksymbol;
};
} rocprofiler_hsa_callback_data_t;
@@ -526,6 +533,7 @@ typedef struct {
rocprofiler_hsa_callback_fun_t device; // agent assign callback
rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
rocprofiler_hsa_callback_fun_t submit; // packet submit callback
rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
} rocprofiler_hsa_callbacks_t;
// Set callbacks. If the callback is NULL then it is disabled.
+1
Просмотреть файл
@@ -363,6 +363,7 @@ class Context {
~Context() { Destruct(); }
void Destruct() {
hsa_signal_destroy(dispatch_signal_);
for (const auto& v : info_map_) {
const std::string& name = v.first;
const rocprofiler_feature_t* info = v.second;
+62 -1
Просмотреть файл
@@ -25,6 +25,7 @@ SOFTWARE.
#ifndef _SRC_CORE_HSA_INTERCEPTOR_H
#define _SRC_CORE_HSA_INTERCEPTOR_H
#include <cxxabi.h>
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <hsa_ven_amd_loader.h>
@@ -49,7 +50,8 @@ SOFTWARE.
(ID == ROCPROFILER_HSA_CB_ID_ALLOCATE) ? callbacks_.allocate: \
(ID == ROCPROFILER_HSA_CB_ID_DEVICE) ? callbacks_.device: \
(ID == ROCPROFILER_HSA_CB_ID_MEMCOPY) ? callbacks_.memcopy: \
callbacks_.submit; \
(ID == ROCPROFILER_HSA_CB_ID_SUBMIT) ? callbacks_.submit: \
callbacks_.ksymbol; \
if ((__callback != NULL) && (recursion_ == false))
#define DO_HSA_CALLBACK \
@@ -62,6 +64,14 @@ SOFTWARE.
#define ISSUE_HSA_CALLBACK(ID) \
do { IS_HSA_CALLBACK(ID) { DO_HSA_CALLBACK; } } while(0)
// Demangle C++ symbol name
static const char* cpp_demangle(const char* symname) {
size_t size = 0;
int status;
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
return (ret != 0) ? ret : strdup(symname);
}
namespace rocprofiler {
extern decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
extern decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
@@ -337,6 +347,39 @@ class HsaInterceptor {
return HSA_STATUS_SUCCESS;
}
static hsa_status_t KernelSymbolCallback(
hsa_executable_t executable,
hsa_executable_symbol_t symbol,
void *arg)
{
const int free_flag = reinterpret_cast<long>(arg);
hsa_symbol_kind_t kind = (hsa_symbol_kind_t)0;
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &kind));
if (kind == HSA_SYMBOL_KIND_KERNEL) {
const char* name = NULL;
uint32_t len = 0;
uint64_t obj = 0;
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &obj));
if (free_flag == 0) {
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len));
char sym_name[len + 1];
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name));
name = cpp_demangle(sym_name);
}
rocprofiler_hsa_callback_data_t data{};
data.ksymbol.object = obj;
data.ksymbol.name = name;
data.ksymbol.name_length = len;
data.ksymbol.destroy = free_flag;
ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL);
}
return HSA_STATUS_SUCCESS;
}
static hsa_status_t ExecutableFreeze(
hsa_executable_t executable,
const char *options)
@@ -352,6 +395,15 @@ class HsaInterceptor {
reinterpret_cast<void*>(0));
}
{
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
HSA_RT(hsa_executable_iterate_symbols(
executable,
KernelSymbolCallback,
reinterpret_cast<void*>(0)));
}
}
return status;
}
@@ -367,6 +419,15 @@ class HsaInterceptor {
reinterpret_cast<void*>(1));
}
{
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
HSA_RT(hsa_executable_iterate_symbols(
executable,
KernelSymbolCallback,
reinterpret_cast<void*>(1)));
}
}
HSA_RT(hsa_executable_destroy_fn(executable));
return status;
+6 -25
Просмотреть файл
@@ -24,7 +24,6 @@ THE SOFTWARE.
#define _SRC_CORE_INTERCEPT_QUEUE_H
#include <amd_hsa_kernel_code.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <sys/syscall.h>
@@ -165,12 +164,7 @@ class InterceptQueue {
const hsa_kernel_dispatch_packet_t* dispatch_packet =
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
const hsa_signal_t completion_signal = dispatch_packet->completion_signal;
#if 0
// Prepareing dispatch callback data
uint64_t kernel_object = dispatch_packet->kernel_object;
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
const char* kernel_name = QueryKernelName(kernel_object, kernel_code);
#endif
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
obj->agent_info_->dev_index,
obj->queue_,
@@ -178,18 +172,15 @@ class InterceptQueue {
obj->queue_id,
completion_signal,
dispatch_packet,
NULL, // kernel_name
0, // kernel_object
NULL, // kernel_code
NULL, // kernel_name
0, // kernel_object
NULL, // kernel_code
0, // (uint32_t)syscall(__NR_gettid),
NULL};
NULL}; // record
// Calling dispatch callback
rocprofiler_group_t group = {};
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
#if 0
free(const_cast<char*>(kernel_name));
#endif
Context* context = reinterpret_cast<Context*>(group.context);
// Injecting profiling start/stop packets
if ((status == HSA_STATUS_SUCCESS) && (context != NULL)) {
@@ -306,7 +297,6 @@ class InterceptQueue {
// Calling dispatch callback
rocprofiler_group_t group = {};
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
free(const_cast<char*>(kernel_name));
// Injecting profiling start/stop packets
if ((status != HSA_STATUS_SUCCESS) || (group.context == NULL)) {
if (tracker_entry != NULL) {
@@ -445,7 +435,6 @@ class InterceptQueue {
// Calling dispatch callback
rocprofiler_group_t group = {};
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
free(const_cast<char*>(kernel_name));
// Injecting profiling start/stop packets
if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) {
@@ -539,14 +528,6 @@ class InterceptQueue {
return (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
}
// Demangle C++ symbol name
static const char* cpp_demangle(const char* symname) {
size_t size = 0;
int status;
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
return (ret != 0) ? ret : strdup(symname);
}
static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) {
const uint16_t kernel_object_flag = *((uint64_t*)kernel_code + 1);
if (kernel_object_flag == 0) {
@@ -557,7 +538,7 @@ class InterceptQueue {
const char* kernel_symname = (util::HsaRsrcFactory::IsExecutableTracking()) ?
util::HsaRsrcFactory::GetKernelNameRef(kernel_object) :
GetKernelName(kernel_code->runtime_loader_kernel_symbol);
return cpp_demangle(kernel_symname);
return kernel_symname;
}
// method to get an intercept queue object
+1 -1
Просмотреть файл
@@ -167,7 +167,7 @@ class Tracker {
hsa_signal_t& dispatch_signal = context->GetDispatchSignal();
util::HsaRsrcFactory::Instance().HsaApi()->hsa_signal_store_screlease(dispatch_signal, signal_value);
hsa_status_t status =
util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler, group);
util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler_opt, group);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
}
+4 -1
Просмотреть файл
@@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
@@ -626,6 +627,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
close(file_handle);
// Update output parameter
*code_desc = kernelSymbol;
return true;
@@ -705,7 +708,7 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
abort();
}
return it->second;
+9
Просмотреть файл
@@ -128,6 +128,15 @@ export ROCP_THRS=10
export ROCP_INPUT=pmc_input.xml
eval_test "'rocprof' libtool PMC n-thread test" ./test/ctrl
export ROCP_OPT_MODE=1
export ROCP_KITER=20
export ROCP_DITER=20
export ROCP_AGENTS=1
export ROCP_THRS=10
export ROCP_INPUT=pmc_input.xml
eval_test "'rocprof' libtool PMC n-thread opt test" ./test/ctrl
unset ROCP_OPT_MODE
export ROCP_KITER=20
export ROCP_DITER=20
export ROCP_AGENTS=1
+52 -44
Просмотреть файл
@@ -100,7 +100,7 @@ struct context_entry_t {
unsigned feature_count;
rocprofiler_callback_data_t data;
kernel_properties_t kernel_properties;
uint64_t kernel_object;
HsaRsrcFactory::symbols_map_it_t kernel_name_it;
FILE* file_handle;
};
@@ -503,7 +503,7 @@ void output_group(const context_entry_t* entry, const char* label) {
}
// Dump stored context entry
bool dump_context_entry(context_entry_t* entry) {
bool dump_context_entry(context_entry_t* entry, bool to_clean = true) {
hsa_status_t status = HSA_STATUS_ERROR;
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
@@ -548,7 +548,7 @@ bool dump_context_entry(context_entry_t* entry) {
fprintf(file_handle, "\n");
fflush(file_handle);
}
if (record) {
if (record && to_clean) {
delete record;
entry->data.record = NULL;
}
@@ -566,11 +566,11 @@ bool dump_context_entry(context_entry_t* entry) {
std::ostringstream oss;
oss << index << "__" << filtr_kernel_name(entry->data.kernel_name);
output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
free(const_cast<char*>(entry->data.kernel_name));
if (to_clean) free(const_cast<char*>(entry->data.kernel_name));
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group.context);
if (to_clean) rocprofiler_close(group.context);
}
return true;
@@ -644,31 +644,6 @@ bool context_handler(rocprofiler_group_t group, void* arg) {
return false;
}
static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) {
const amd_kernel_code_t* kernel_code = NULL;
hsa_status_t status =
HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
reinterpret_cast<const void*>(kernel_object),
reinterpret_cast<const void**>(&kernel_code));
if (HSA_STATUS_SUCCESS != status) {
kernel_code = reinterpret_cast<amd_kernel_code_t*>(kernel_object);
}
return kernel_code;
}
// Demangle C++ symbol name
static const char* cpp_demangle(const char* symname) {
size_t size = 0;
int status;
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
return (ret != 0) ? ret : strdup(symname);
}
static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) {
const char* kernel_symname = HsaRsrcFactory::GetKernelNameRef(kernel_object);
return cpp_demangle(kernel_symname);
}
// Profiling completion handler
// Dump context entry
bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
@@ -677,25 +652,22 @@ bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
handler_arg_t* handler_arg = reinterpret_cast<handler_arg_t*>(arg);
ctx_entry->features = handler_arg->features;
ctx_entry->feature_count = handler_arg->feature_count;
ctx_entry->data.kernel_name = ctx_entry->kernel_name_it->second.name;
ctx_entry->file_handle = result_file_handle;
const uint64_t kernel_object = ctx_entry->kernel_object;
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
ctx_entry->data.kernel_name = QueryKernelName(kernel_object, kernel_code);
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
abort();
}
dump_context_entry(ctx_entry);
dump_context_entry(ctx_entry, false);
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
abort();
}
free((void*)(ctx_entry->data.kernel_name));
HsaRsrcFactory::ReleaseKernelNameRef(ctx_entry->kernel_name_it);
return false;
}
@@ -766,13 +738,36 @@ bool check_filter(const rocprofiler_callback_data_t* callback_data, const callba
return found;
}
static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) {
const amd_kernel_code_t* kernel_code = NULL;
hsa_status_t status =
HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
reinterpret_cast<const void*>(kernel_object),
reinterpret_cast<const void**>(&kernel_code));
if (HSA_STATUS_SUCCESS != status) {
kernel_code = reinterpret_cast<amd_kernel_code_t*>(kernel_object);
}
return kernel_code;
}
// Setting kernel properties
void set_kernel_properties(const rocprofiler_callback_data_t* callback_data,
kernel_properties_t* kernel_properties_ptr)
context_entry_t* entry)
{
const hsa_kernel_dispatch_packet_t* packet = callback_data->packet;
kernel_properties_t* kernel_properties_ptr = &(entry->kernel_properties);
const amd_kernel_code_t* kernel_code = callback_data->kernel_code;
entry->data = *callback_data;
if (kernel_code == NULL) {
const uint64_t kernel_object = callback_data->packet->kernel_object;
kernel_code = GetKernelCode(kernel_object);
entry->kernel_name_it = HsaRsrcFactory::AcquireKernelNameRef(kernel_object);
} else {
entry->data.kernel_name = strdup(callback_data->kernel_name);
}
uint64_t grid_size = packet->grid_size_x * packet->grid_size_y * packet->grid_size_z;
if (grid_size > UINT32_MAX) abort();
kernel_properties_ptr->grid_size = (uint32_t)grid_size;
@@ -806,7 +801,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
// Context entry
context_entry_t* entry = alloc_context_entry();
// Setting kernel properties
set_kernel_properties(callback_data, &(entry->kernel_properties));
set_kernel_properties(callback_data, entry);
// context properties
rocprofiler_properties_t properties{};
@@ -852,8 +847,6 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
entry->group = *group;
entry->features = features;
entry->feature_count = feature_count;
entry->data = *callback_data;
entry->data.kernel_name = strdup(callback_data->kernel_name);
entry->file_handle = tool_data->file_handle;
entry->active = true;
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
@@ -881,7 +874,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d
rocprofiler_t* context = pool_entry.context;
context_entry_t* entry = reinterpret_cast<context_entry_t*>(pool_entry.payload);
// Setting kernel properties
set_kernel_properties(callback_data, &(entry->kernel_properties));
set_kernel_properties(callback_data, entry);
// Get group[0]
status = rocprofiler_get_group(context, 0, group);
check_status(status);
@@ -890,8 +883,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d
entry->index = UINT32_MAX;
entry->agent = agent;
entry->group = *group;
entry->data = *callback_data;
entry->kernel_object = callback_data->packet->kernel_object;
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
return status;
}
@@ -1120,9 +1112,19 @@ rocprofiler_hsa_callbacks_t hsa_callbacks {
hsa_unified_callback,
hsa_unified_callback,
hsa_unified_callback,
hsa_unified_callback
hsa_unified_callback,
NULL
};
// HSA kernel symbol callback
hsa_status_t hsa_ksymbol_cb(rocprofiler_hsa_cb_id_t id,
const rocprofiler_hsa_callback_data_t* data,
void* arg)
{
HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.destroy);
return HSA_STATUS_SUCCESS;
}
// Tool constructor
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
{
@@ -1467,6 +1469,12 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
callbacks_ptrs.destroy = destroy_callback;
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg);
rocprofiler_hsa_callbacks_t cs{};
cs.ksymbol = hsa_ksymbol_cb;
rocprofiler_set_hsa_callbacks(cs, NULL);
settings->code_obj_tracking = 0;
settings->hsa_intercepting = 1;
} else {
// Adding dispatch observer
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
+49 -30
Просмотреть файл
@@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "util/hsa_rsrc_factory.h"
#include <cxxabi.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <hsa.h>
@@ -36,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
@@ -44,6 +46,14 @@ POSSIBILITY OF SUCH DAMAGE.
#include <string>
#include <vector>
// Demangle C++ symbol name
static const char* cpp_demangle(const char* symname) {
size_t size = 0;
int status;
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
return (ret != 0) ? ret : strdup(symname);
}
// Callback function to get available in the system agents
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
hsa_status_t status = HSA_STATUS_ERROR;
@@ -192,6 +202,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
hsa_api_.hsa_executable_destroy = table->core_->hsa_executable_destroy_fn;
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
@@ -232,6 +243,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
hsa_api_.hsa_executable_destroy = hsa_executable_destroy;
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
@@ -618,6 +630,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
close(file_handle);
// Update output parameter
*code_desc = kernelSymbol;
return true;
@@ -693,52 +707,57 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
return write_idx;
}
const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
abort();
}
return it->second;
}
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
executable_tracking_on_ = true;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
}
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *arg) {
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
CHECK_STATUS("Error in getting symbol info", status);
if (value == HSA_SYMBOL_KIND_KERNEL) {
uint64_t addr = 0;
uint32_t len = 0;
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
CHECK_STATUS("Error in getting kernel object", status);
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
CHECK_STATUS("Error in getting name len", status);
char *name = new char[len + 1];
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
CHECK_STATUS("Error in getting kernel name", status);
name[len] = 0;
auto ret = symbols_map_->insert({addr, name});
if (ret.second == false) {
delete[] ret.first->second;
ret.first->second = name;
const int to_free = reinterpret_cast<long>(arg);
const char* name = NULL;
if (to_free == 0) {
uint32_t len = 0;
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
CHECK_STATUS("Error in getting name len", status);
char sym_name[len + 1];
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name);
CHECK_STATUS("Error in getting kernel name", status);
sym_name[len] = 0;
name = cpp_demangle(sym_name);
}
SetKernelNameRef(addr, name, to_free);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
std::lock_guard<mutex_t> lck(mutex_);
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)0);
CHECK_STATUS("Error in iterating executable symbols", status);
return hsa_api_.hsa_executable_freeze(executable, options);;
return hsa_api_.hsa_executable_freeze(executable, options);
}
hsa_status_t HsaRsrcFactory::hsa_executable_destroy_interceptor(hsa_executable_t executable) {
std::lock_guard<mutex_t> lck(mutex_);
if (symbols_map_ != NULL) {
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)1);
CHECK_STATUS("Error in iterating executable symbols", status);
}
return hsa_api_.hsa_executable_destroy(executable);
}
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
executable_tracking_on_ = true;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor;
}
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
+91 -3
Просмотреть файл
@@ -95,6 +95,7 @@ struct hsa_pfn_t {
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
decltype(hsa_executable_freeze)* hsa_executable_freeze;
decltype(hsa_executable_destroy)* hsa_executable_destroy;
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
@@ -286,6 +287,13 @@ class HsaRsrcFactory {
typedef std::recursive_mutex mutex_t;
typedef HsaTimer::timestamp_t timestamp_t;
// Executables loading tracking
struct symbols_map_data_t {
const char* name;
uint64_t refs_count;
};
typedef std::map<uint64_t, symbols_map_data_t> symbols_map_t;
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
std::lock_guard<mutex_t> lck(mutex_);
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
@@ -406,7 +414,88 @@ class HsaRsrcFactory {
// Enable executables loading tracking
static bool IsExecutableTracking() { return executable_tracking_on_; }
static void EnableExecutableTracking(HsaApiTable* table);
static const char* GetKernelNameRef(uint64_t addr);
typedef symbols_map_t::iterator symbols_map_it_t;
static inline const char* GetKernelNameRef(const uint64_t& addr) {
if (symbols_map_ == NULL) {
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr);
abort();
}
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
abort();
}
return it->second.name;
}
static inline symbols_map_it_t AcquireKernelNameRef(const uint64_t& addr) {
if (symbols_map_ == NULL) {
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr);
abort();
}
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
abort();
}
std::atomic<uint64_t>* atomic_ptr =
reinterpret_cast<std::atomic<uint64_t>*>(&(it->second.refs_count));
atomic_ptr->fetch_add(1, std::memory_order_relaxed);
return it;
}
static inline void ReleaseKernelNameRef(const symbols_map_it_t& it) {
std::atomic<uint64_t>* atomic_ptr =
reinterpret_cast<std::atomic<uint64_t>*>(&(it->second.refs_count));
atomic_ptr->fetch_sub(1, std::memory_order_relaxed);
}
static inline void SetKernelNameRef(const uint64_t& addr, const char* name, const int& free) {
if (symbols_map_ == NULL) {
std::lock_guard<mutex_t> lck(mutex_);
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
}
auto it = symbols_map_->find(addr);
if (it != symbols_map_->end()) {
while (1) {
while(it->second.refs_count != 0) sched_yield();
mutex_.lock();
if (it->second.refs_count == 0) break;
mutex_.unlock();
}
}
if (it != symbols_map_->end()) {
delete[] it->second.name;
if (free == 1) {
symbols_map_->erase(it);
} else {
fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to set kernel addr (0x%lx) conflict\n", addr);
abort();
}
} else {
if (free == 0) {
symbols_map_->insert({addr, symbols_map_data_t{name, 0}});
} else {
fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to free kernel addr (0x%lx) not found\n", addr);
abort();
}
}
mutex_.unlock();
}
// Initialize HSA API table
void static InitHsaApiTable(HsaApiTable* table);
@@ -492,11 +581,10 @@ class HsaRsrcFactory {
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
// Executables loading tracking
typedef std::map<uint64_t, const char*> symbols_map_t;
static symbols_map_t* symbols_map_;
static bool executable_tracking_on_;
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
static hsa_status_t hsa_executable_destroy_interceptor(hsa_executable_t executable);
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
// HSA runtime API table