optimization mechanism fix: correct tracker handler; kernel name query on completion;
Change-Id: I14da152b4ac3c7d8fd1af2f54e9d71f834071622
[ROCm/rocprofiler commit: 80747de208]
Этот коммит содержится в:
@@ -3,4 +3,4 @@ BIN_DIR=`dirname $0`
|
||||
BLD_DIR=$BIN_DIR/build
|
||||
|
||||
export CMAKE_PREFIX_PATH=/opt/rocm/include/hsa:/opt/rocm
|
||||
rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest && ./run.sh
|
||||
rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. && make -j && make mytest
|
||||
|
||||
@@ -480,7 +480,8 @@ typedef enum {
|
||||
ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback
|
||||
ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
|
||||
ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
|
||||
ROCPROFILER_HSA_CB_ID_SUBMIT = 3 // Packet submit callback
|
||||
ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
|
||||
ROCPROFILER_HSA_CB_ID_KSYMBOL = 4 // Loading/unloading of kernel symbol
|
||||
} rocprofiler_hsa_cb_id_t;
|
||||
|
||||
// HSA callback data type
|
||||
@@ -511,6 +512,12 @@ typedef struct {
|
||||
uint32_t device_type; // type of device the packed is submitted to
|
||||
uint32_t device_id; // id of device the packed is submitted to
|
||||
} submit;
|
||||
struct {
|
||||
uint64_t object; // kernel symbol object
|
||||
const char* name; // kernel symbol name
|
||||
uint32_t name_length; // kernel symbol name length
|
||||
int destroy; // symbol executable destroy
|
||||
} ksymbol;
|
||||
};
|
||||
} rocprofiler_hsa_callback_data_t;
|
||||
|
||||
@@ -526,6 +533,7 @@ typedef struct {
|
||||
rocprofiler_hsa_callback_fun_t device; // agent assign callback
|
||||
rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
|
||||
rocprofiler_hsa_callback_fun_t submit; // packet submit callback
|
||||
rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
|
||||
} rocprofiler_hsa_callbacks_t;
|
||||
|
||||
// Set callbacks. If the callback is NULL then it is disabled.
|
||||
|
||||
@@ -363,6 +363,7 @@ class Context {
|
||||
~Context() { Destruct(); }
|
||||
|
||||
void Destruct() {
|
||||
hsa_signal_destroy(dispatch_signal_);
|
||||
for (const auto& v : info_map_) {
|
||||
const std::string& name = v.first;
|
||||
const rocprofiler_feature_t* info = v.second;
|
||||
|
||||
@@ -25,6 +25,7 @@ SOFTWARE.
|
||||
#ifndef _SRC_CORE_HSA_INTERCEPTOR_H
|
||||
#define _SRC_CORE_HSA_INTERCEPTOR_H
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <hsa_ven_amd_loader.h>
|
||||
@@ -49,7 +50,8 @@ SOFTWARE.
|
||||
(ID == ROCPROFILER_HSA_CB_ID_ALLOCATE) ? callbacks_.allocate: \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_DEVICE) ? callbacks_.device: \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_MEMCOPY) ? callbacks_.memcopy: \
|
||||
callbacks_.submit; \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_SUBMIT) ? callbacks_.submit: \
|
||||
callbacks_.ksymbol; \
|
||||
if ((__callback != NULL) && (recursion_ == false))
|
||||
|
||||
#define DO_HSA_CALLBACK \
|
||||
@@ -62,6 +64,14 @@ SOFTWARE.
|
||||
#define ISSUE_HSA_CALLBACK(ID) \
|
||||
do { IS_HSA_CALLBACK(ID) { DO_HSA_CALLBACK; } } while(0)
|
||||
|
||||
// Demangle C++ symbol name
|
||||
static const char* cpp_demangle(const char* symname) {
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
|
||||
return (ret != 0) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
|
||||
extern decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
|
||||
@@ -337,6 +347,39 @@ class HsaInterceptor {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static hsa_status_t KernelSymbolCallback(
|
||||
hsa_executable_t executable,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *arg)
|
||||
{
|
||||
const int free_flag = reinterpret_cast<long>(arg);
|
||||
hsa_symbol_kind_t kind = (hsa_symbol_kind_t)0;
|
||||
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &kind));
|
||||
|
||||
if (kind == HSA_SYMBOL_KIND_KERNEL) {
|
||||
const char* name = NULL;
|
||||
uint32_t len = 0;
|
||||
uint64_t obj = 0;
|
||||
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &obj));
|
||||
if (free_flag == 0) {
|
||||
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len));
|
||||
char sym_name[len + 1];
|
||||
HSA_RT(hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name));
|
||||
name = cpp_demangle(sym_name);
|
||||
}
|
||||
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.ksymbol.object = obj;
|
||||
data.ksymbol.name = name;
|
||||
data.ksymbol.name_length = len;
|
||||
data.ksymbol.destroy = free_flag;
|
||||
|
||||
ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static hsa_status_t ExecutableFreeze(
|
||||
hsa_executable_t executable,
|
||||
const char *options)
|
||||
@@ -352,6 +395,15 @@ class HsaInterceptor {
|
||||
reinterpret_cast<void*>(0));
|
||||
}
|
||||
|
||||
{
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
|
||||
HSA_RT(hsa_executable_iterate_symbols(
|
||||
executable,
|
||||
KernelSymbolCallback,
|
||||
reinterpret_cast<void*>(0)));
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -367,6 +419,15 @@ class HsaInterceptor {
|
||||
reinterpret_cast<void*>(1));
|
||||
}
|
||||
|
||||
{
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
|
||||
HSA_RT(hsa_executable_iterate_symbols(
|
||||
executable,
|
||||
KernelSymbolCallback,
|
||||
reinterpret_cast<void*>(1)));
|
||||
}
|
||||
}
|
||||
|
||||
HSA_RT(hsa_executable_destroy_fn(executable));
|
||||
|
||||
return status;
|
||||
|
||||
@@ -24,7 +24,6 @@ THE SOFTWARE.
|
||||
#define _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
|
||||
#include <amd_hsa_kernel_code.h>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
@@ -165,12 +164,7 @@ class InterceptQueue {
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet =
|
||||
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
const hsa_signal_t completion_signal = dispatch_packet->completion_signal;
|
||||
#if 0
|
||||
// Prepareing dispatch callback data
|
||||
uint64_t kernel_object = dispatch_packet->kernel_object;
|
||||
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
|
||||
const char* kernel_name = QueryKernelName(kernel_object, kernel_code);
|
||||
#endif
|
||||
|
||||
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
|
||||
obj->agent_info_->dev_index,
|
||||
obj->queue_,
|
||||
@@ -178,18 +172,15 @@ class InterceptQueue {
|
||||
obj->queue_id,
|
||||
completion_signal,
|
||||
dispatch_packet,
|
||||
NULL, // kernel_name
|
||||
0, // kernel_object
|
||||
NULL, // kernel_code
|
||||
NULL, // kernel_name
|
||||
0, // kernel_object
|
||||
NULL, // kernel_code
|
||||
0, // (uint32_t)syscall(__NR_gettid),
|
||||
NULL};
|
||||
NULL}; // record
|
||||
|
||||
// Calling dispatch callback
|
||||
rocprofiler_group_t group = {};
|
||||
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
|
||||
#if 0
|
||||
free(const_cast<char*>(kernel_name));
|
||||
#endif
|
||||
Context* context = reinterpret_cast<Context*>(group.context);
|
||||
// Injecting profiling start/stop packets
|
||||
if ((status == HSA_STATUS_SUCCESS) && (context != NULL)) {
|
||||
@@ -306,7 +297,6 @@ class InterceptQueue {
|
||||
// Calling dispatch callback
|
||||
rocprofiler_group_t group = {};
|
||||
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
|
||||
free(const_cast<char*>(kernel_name));
|
||||
// Injecting profiling start/stop packets
|
||||
if ((status != HSA_STATUS_SUCCESS) || (group.context == NULL)) {
|
||||
if (tracker_entry != NULL) {
|
||||
@@ -445,7 +435,6 @@ class InterceptQueue {
|
||||
// Calling dispatch callback
|
||||
rocprofiler_group_t group = {};
|
||||
hsa_status_t status = (dispatch_callback_.load())(&data, callback_data_, &group);
|
||||
free(const_cast<char*>(kernel_name));
|
||||
|
||||
// Injecting profiling start/stop packets
|
||||
if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) {
|
||||
@@ -539,14 +528,6 @@ class InterceptQueue {
|
||||
return (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
|
||||
}
|
||||
|
||||
// Demangle C++ symbol name
|
||||
static const char* cpp_demangle(const char* symname) {
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
|
||||
return (ret != 0) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) {
|
||||
const uint16_t kernel_object_flag = *((uint64_t*)kernel_code + 1);
|
||||
if (kernel_object_flag == 0) {
|
||||
@@ -557,7 +538,7 @@ class InterceptQueue {
|
||||
const char* kernel_symname = (util::HsaRsrcFactory::IsExecutableTracking()) ?
|
||||
util::HsaRsrcFactory::GetKernelNameRef(kernel_object) :
|
||||
GetKernelName(kernel_code->runtime_loader_kernel_symbol);
|
||||
return cpp_demangle(kernel_symname);
|
||||
return kernel_symname;
|
||||
}
|
||||
|
||||
// method to get an intercept queue object
|
||||
|
||||
@@ -167,7 +167,7 @@ class Tracker {
|
||||
hsa_signal_t& dispatch_signal = context->GetDispatchSignal();
|
||||
util::HsaRsrcFactory::Instance().HsaApi()->hsa_signal_store_screlease(dispatch_signal, signal_value);
|
||||
hsa_status_t status =
|
||||
util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler, group);
|
||||
util::HsaRsrcFactory::Instance().HsaApi()->hsa_amd_signal_async_handler(dispatch_signal, HSA_SIGNAL_CONDITION_LT, signal_value, Handler_opt, group);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
@@ -626,6 +627,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
close(file_handle);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
@@ -705,7 +708,7 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return it->second;
|
||||
|
||||
@@ -128,6 +128,15 @@ export ROCP_THRS=10
|
||||
export ROCP_INPUT=pmc_input.xml
|
||||
eval_test "'rocprof' libtool PMC n-thread test" ./test/ctrl
|
||||
|
||||
export ROCP_OPT_MODE=1
|
||||
export ROCP_KITER=20
|
||||
export ROCP_DITER=20
|
||||
export ROCP_AGENTS=1
|
||||
export ROCP_THRS=10
|
||||
export ROCP_INPUT=pmc_input.xml
|
||||
eval_test "'rocprof' libtool PMC n-thread opt test" ./test/ctrl
|
||||
unset ROCP_OPT_MODE
|
||||
|
||||
export ROCP_KITER=20
|
||||
export ROCP_DITER=20
|
||||
export ROCP_AGENTS=1
|
||||
|
||||
@@ -100,7 +100,7 @@ struct context_entry_t {
|
||||
unsigned feature_count;
|
||||
rocprofiler_callback_data_t data;
|
||||
kernel_properties_t kernel_properties;
|
||||
uint64_t kernel_object;
|
||||
HsaRsrcFactory::symbols_map_it_t kernel_name_it;
|
||||
FILE* file_handle;
|
||||
};
|
||||
|
||||
@@ -503,7 +503,7 @@ void output_group(const context_entry_t* entry, const char* label) {
|
||||
}
|
||||
|
||||
// Dump stored context entry
|
||||
bool dump_context_entry(context_entry_t* entry) {
|
||||
bool dump_context_entry(context_entry_t* entry, bool to_clean = true) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
volatile std::atomic<bool>* valid = reinterpret_cast<std::atomic<bool>*>(&entry->valid);
|
||||
@@ -548,7 +548,7 @@ bool dump_context_entry(context_entry_t* entry) {
|
||||
fprintf(file_handle, "\n");
|
||||
fflush(file_handle);
|
||||
}
|
||||
if (record) {
|
||||
if (record && to_clean) {
|
||||
delete record;
|
||||
entry->data.record = NULL;
|
||||
}
|
||||
@@ -566,11 +566,11 @@ bool dump_context_entry(context_entry_t* entry) {
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << filtr_kernel_name(entry->data.kernel_name);
|
||||
output_results(entry, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
if (to_clean) free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group.context);
|
||||
if (to_clean) rocprofiler_close(group.context);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -644,31 +644,6 @@ bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
reinterpret_cast<const void*>(kernel_object),
|
||||
reinterpret_cast<const void**>(&kernel_code));
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
kernel_code = reinterpret_cast<amd_kernel_code_t*>(kernel_object);
|
||||
}
|
||||
return kernel_code;
|
||||
}
|
||||
|
||||
// Demangle C++ symbol name
|
||||
static const char* cpp_demangle(const char* symname) {
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
|
||||
return (ret != 0) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) {
|
||||
const char* kernel_symname = HsaRsrcFactory::GetKernelNameRef(kernel_object);
|
||||
return cpp_demangle(kernel_symname);
|
||||
}
|
||||
|
||||
// Profiling completion handler
|
||||
// Dump context entry
|
||||
bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
|
||||
@@ -677,25 +652,22 @@ bool context_pool_handler(const rocprofiler_pool_entry_t* entry, void* arg) {
|
||||
handler_arg_t* handler_arg = reinterpret_cast<handler_arg_t*>(arg);
|
||||
ctx_entry->features = handler_arg->features;
|
||||
ctx_entry->feature_count = handler_arg->feature_count;
|
||||
ctx_entry->data.kernel_name = ctx_entry->kernel_name_it->second.name;
|
||||
ctx_entry->file_handle = result_file_handle;
|
||||
|
||||
const uint64_t kernel_object = ctx_entry->kernel_object;
|
||||
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
|
||||
ctx_entry->data.kernel_name = QueryKernelName(kernel_object, kernel_code);
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
abort();
|
||||
}
|
||||
|
||||
dump_context_entry(ctx_entry);
|
||||
dump_context_entry(ctx_entry, false);
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
abort();
|
||||
}
|
||||
|
||||
free((void*)(ctx_entry->data.kernel_name));
|
||||
HsaRsrcFactory::ReleaseKernelNameRef(ctx_entry->kernel_name_it);
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -766,13 +738,36 @@ bool check_filter(const rocprofiler_callback_data_t* callback_data, const callba
|
||||
return found;
|
||||
}
|
||||
|
||||
static const amd_kernel_code_t* GetKernelCode(uint64_t kernel_object) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
reinterpret_cast<const void*>(kernel_object),
|
||||
reinterpret_cast<const void**>(&kernel_code));
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
kernel_code = reinterpret_cast<amd_kernel_code_t*>(kernel_object);
|
||||
}
|
||||
return kernel_code;
|
||||
}
|
||||
|
||||
// Setting kernel properties
|
||||
void set_kernel_properties(const rocprofiler_callback_data_t* callback_data,
|
||||
kernel_properties_t* kernel_properties_ptr)
|
||||
context_entry_t* entry)
|
||||
{
|
||||
const hsa_kernel_dispatch_packet_t* packet = callback_data->packet;
|
||||
kernel_properties_t* kernel_properties_ptr = &(entry->kernel_properties);
|
||||
const amd_kernel_code_t* kernel_code = callback_data->kernel_code;
|
||||
|
||||
entry->data = *callback_data;
|
||||
|
||||
if (kernel_code == NULL) {
|
||||
const uint64_t kernel_object = callback_data->packet->kernel_object;
|
||||
kernel_code = GetKernelCode(kernel_object);
|
||||
entry->kernel_name_it = HsaRsrcFactory::AcquireKernelNameRef(kernel_object);
|
||||
} else {
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
}
|
||||
|
||||
uint64_t grid_size = packet->grid_size_x * packet->grid_size_y * packet->grid_size_z;
|
||||
if (grid_size > UINT32_MAX) abort();
|
||||
kernel_properties_ptr->grid_size = (uint32_t)grid_size;
|
||||
@@ -806,7 +801,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
// Context entry
|
||||
context_entry_t* entry = alloc_context_entry();
|
||||
// Setting kernel properties
|
||||
set_kernel_properties(callback_data, &(entry->kernel_properties));
|
||||
set_kernel_properties(callback_data, entry);
|
||||
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
@@ -852,8 +847,6 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
entry->group = *group;
|
||||
entry->features = features;
|
||||
entry->feature_count = feature_count;
|
||||
entry->data = *callback_data;
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
entry->file_handle = tool_data->file_handle;
|
||||
entry->active = true;
|
||||
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
|
||||
@@ -881,7 +874,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d
|
||||
rocprofiler_t* context = pool_entry.context;
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(pool_entry.payload);
|
||||
// Setting kernel properties
|
||||
set_kernel_properties(callback_data, &(entry->kernel_properties));
|
||||
set_kernel_properties(callback_data, entry);
|
||||
// Get group[0]
|
||||
status = rocprofiler_get_group(context, 0, group);
|
||||
check_status(status);
|
||||
@@ -890,8 +883,7 @@ hsa_status_t dispatch_callback_opt(const rocprofiler_callback_data_t* callback_d
|
||||
entry->index = UINT32_MAX;
|
||||
entry->agent = agent;
|
||||
entry->group = *group;
|
||||
entry->data = *callback_data;
|
||||
entry->kernel_object = callback_data->packet->kernel_object;
|
||||
|
||||
reinterpret_cast<std::atomic<bool>*>(&entry->valid)->store(true);
|
||||
return status;
|
||||
}
|
||||
@@ -1120,9 +1112,19 @@ rocprofiler_hsa_callbacks_t hsa_callbacks {
|
||||
hsa_unified_callback,
|
||||
hsa_unified_callback,
|
||||
hsa_unified_callback,
|
||||
hsa_unified_callback
|
||||
hsa_unified_callback,
|
||||
NULL
|
||||
};
|
||||
|
||||
// HSA kernel symbol callback
|
||||
hsa_status_t hsa_ksymbol_cb(rocprofiler_hsa_cb_id_t id,
|
||||
const rocprofiler_hsa_callback_data_t* data,
|
||||
void* arg)
|
||||
{
|
||||
HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.destroy);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
{
|
||||
@@ -1467,6 +1469,12 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
callbacks_ptrs.destroy = destroy_callback;
|
||||
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_arg);
|
||||
|
||||
rocprofiler_hsa_callbacks_t cs{};
|
||||
cs.ksymbol = hsa_ksymbol_cb;
|
||||
rocprofiler_set_hsa_callbacks(cs, NULL);
|
||||
settings->code_obj_tracking = 0;
|
||||
settings->hsa_intercepting = 1;
|
||||
} else {
|
||||
// Adding dispatch observer
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
|
||||
|
||||
@@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <hsa.h>
|
||||
@@ -36,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
@@ -44,6 +46,14 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Demangle C++ symbol name
|
||||
static const char* cpp_demangle(const char* symname) {
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
|
||||
return (ret != 0) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
@@ -192,6 +202,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_destroy = table->core_->hsa_executable_destroy_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
@@ -232,6 +243,7 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_destroy = hsa_executable_destroy;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
|
||||
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
|
||||
@@ -618,6 +630,8 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
close(file_handle);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
@@ -693,52 +707,57 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *arg) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char *name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
auto ret = symbols_map_->insert({addr, name});
|
||||
if (ret.second == false) {
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
|
||||
const int to_free = reinterpret_cast<long>(arg);
|
||||
const char* name = NULL;
|
||||
if (to_free == 0) {
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char sym_name[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, sym_name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
sym_name[len] = 0;
|
||||
name = cpp_demangle(sym_name);
|
||||
}
|
||||
|
||||
SetKernelNameRef(addr, name, to_free);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)0);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);;
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_destroy_interceptor(hsa_executable_t executable) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ != NULL) {
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, (void*)1);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
}
|
||||
return hsa_api_.hsa_executable_destroy(executable);
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor;
|
||||
}
|
||||
|
||||
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
|
||||
|
||||
@@ -95,6 +95,7 @@ struct hsa_pfn_t {
|
||||
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
|
||||
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(hsa_executable_destroy)* hsa_executable_destroy;
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
|
||||
@@ -286,6 +287,13 @@ class HsaRsrcFactory {
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef HsaTimer::timestamp_t timestamp_t;
|
||||
|
||||
// Executables loading tracking
|
||||
struct symbols_map_data_t {
|
||||
const char* name;
|
||||
uint64_t refs_count;
|
||||
};
|
||||
typedef std::map<uint64_t, symbols_map_data_t> symbols_map_t;
|
||||
|
||||
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
|
||||
@@ -406,7 +414,88 @@ class HsaRsrcFactory {
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelNameRef(uint64_t addr);
|
||||
|
||||
typedef symbols_map_t::iterator symbols_map_it_t;
|
||||
|
||||
static inline const char* GetKernelNameRef(const uint64_t& addr) {
|
||||
if (symbols_map_ == NULL) {
|
||||
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr);
|
||||
abort();
|
||||
}
|
||||
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
|
||||
return it->second.name;
|
||||
}
|
||||
|
||||
static inline symbols_map_it_t AcquireKernelNameRef(const uint64_t& addr) {
|
||||
if (symbols_map_ == NULL) {
|
||||
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx), error\n", addr);
|
||||
abort();
|
||||
}
|
||||
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::GetKernelNameRef: kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
|
||||
std::atomic<uint64_t>* atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint64_t>*>(&(it->second.refs_count));
|
||||
atomic_ptr->fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
return it;
|
||||
}
|
||||
|
||||
static inline void ReleaseKernelNameRef(const symbols_map_it_t& it) {
|
||||
std::atomic<uint64_t>* atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint64_t>*>(&(it->second.refs_count));
|
||||
atomic_ptr->fetch_sub(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void SetKernelNameRef(const uint64_t& addr, const char* name, const int& free) {
|
||||
if (symbols_map_ == NULL) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
}
|
||||
|
||||
auto it = symbols_map_->find(addr);
|
||||
if (it != symbols_map_->end()) {
|
||||
while (1) {
|
||||
while(it->second.refs_count != 0) sched_yield();
|
||||
mutex_.lock();
|
||||
if (it->second.refs_count == 0) break;
|
||||
mutex_.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
if (it != symbols_map_->end()) {
|
||||
delete[] it->second.name;
|
||||
if (free == 1) {
|
||||
symbols_map_->erase(it);
|
||||
} else {
|
||||
fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to set kernel addr (0x%lx) conflict\n", addr);
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
if (free == 0) {
|
||||
symbols_map_->insert({addr, symbols_map_data_t{name, 0}});
|
||||
} else {
|
||||
fprintf(stderr, "HsaRsrcFactory::SetKernelNameRef: to free kernel addr (0x%lx) not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
mutex_.unlock();
|
||||
}
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
@@ -492,11 +581,10 @@ class HsaRsrcFactory {
|
||||
// System agents map
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// Executables loading tracking
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
|
||||
static hsa_status_t hsa_executable_destroy_interceptor(hsa_executable_t executable);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
|
||||
|
||||
// HSA runtime API table
|
||||
|
||||
Ссылка в новой задаче
Block a user