Fix HSA intercept

Move the HSA intercept to the OnLoad function, so that it is available
as soon as the ROCR is loaded.

Layer the HSA API wrappers on top of the basic HSA activity intercept.

Change-Id: Ie636d59755543cda181e76ec29f0b55081136b63
This commit is contained in:
Laurent Morichetti
2022-06-22 15:19:47 -07:00
zatwierdzone przez Laurent Morichetti
rodzic 054456bcf8
commit e282a82e29
5 zmienionych plików z 178 dodań i 178 usunięć
+11 -11
Wyświetl plik
@@ -29,10 +29,10 @@ H_OUT='hsa_prof_str.h'
CPP_OUT='hsa_prof_str.inline.h'
API_TABLES_H = 'hsa_api_trace.h'
API_HEADERS_H = (
('CoreApiTable', 'hsa.h'),
('AmdExtTable', 'hsa_ext_amd.h'),
('ImageExtTable', 'hsa_ext_image.h'),
('AmdExtTable', API_TABLES_H),
('CoreApi', 'hsa.h'),
('AmdExt', 'hsa_ext_amd.h'),
('ImageExt', 'hsa_ext_image.h'),
('AmdExt', API_TABLES_H),
)
LICENSE = \
@@ -106,7 +106,7 @@ class API_TableParser:
self.inp = open(header, 'r')
self.beg_pattern = re.compile('^\s*struct\s+' + name + '\s*{\s*$')
self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$')
self.end_pattern = re.compile('^\s*};\s*$')
self.array = []
self.parse()
@@ -330,9 +330,9 @@ class API_DescrParser:
self.cpp_content += 'namespace roctracer {\n'
self.cpp_content += 'namespace hsa_support {\n\n'
self.cpp_content += 'static CoreApiTable CoreApiTable_saved;\n'
self.cpp_content += 'static AmdExtTable AmdExtTable_saved;\n'
self.cpp_content += 'static ImageExtTable ImageExtTable_saved;\n\n'
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
self.cpp_content += 'std::atomic<uint64_t> hsa_counter_{1};\n'
self.cpp_content += 'static thread_local uint64_t hsa_correlation_id_tls = 0;\n'
@@ -428,7 +428,7 @@ class API_DescrParser:
content += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
if ret_type != 'void':
content += ' ' + ret_type + ' ret ='
content += ' ' + name + '_saved.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
if ret_type != 'void':
content += ' api_data.' + ret_type + '_retval = ret;\n'
content += ' api_data.phase = 1;\n'
@@ -444,8 +444,8 @@ class API_DescrParser:
if n > 0 and call == '-':
content += '};\n'
if n == 0 or (call == '-' and name != '-'):
content += 'static void intercept_' + name + '(' + name + '* table) {\n'
content += ' ' + name + '_saved = *table;\n'
content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n'
content += ' ' + name + '_saved_before_cb = *table;\n'
if call != '-':
if call != 'hsa_shut_down':
content += ' table->' + call + '_fn = ' + call + '_callback;\n'
+83 -72
Wyświetl plik
@@ -106,28 +106,12 @@ mark_api_callback_t* mark_api_callback_ptr = nullptr;
namespace roctracer {
namespace hsa_support {
decltype(hsa_system_get_info)* hsa_system_get_info_fn = hsa_system_get_info;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn =
hsa_amd_memory_async_copy_rect;
::HsaApiTable* kHsaApiTable;
void SaveHsaApi(::HsaApiTable* table) {
kHsaApiTable = table;
hsa_system_get_info_fn = table->core_->hsa_system_get_info_fn;
hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
}
void RestoreHsaApi() {
::HsaApiTable* table = kHsaApiTable;
table->core_->hsa_system_get_info_fn = hsa_system_get_info_fn;
table->amd_ext_->hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy_fn;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = hsa_amd_memory_async_copy_rect_fn;
}
static CoreApiTable saved_core_api;
static AmdExtTable saved_amd_ext_api;
// async copy activity callback
std::mutex init_mutex;
bool async_copy_callback_enabled = false;
MemoryPool* async_copy_callback_memory_pool = nullptr;
@@ -143,14 +127,15 @@ namespace util {
roctracer_timestamp_t timestamp_ns() {
uint64_t sysclock;
hsa_status_t status = hsa_support::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
hsa_status_t status =
hsa_support::saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0;
CHECK_HSA_STATUS("hsa_system_get_info()", status);
static uint64_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
hsa_status_t status =
hsa_support::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
hsa_status_t status = hsa_support::saved_core_api.hsa_system_get_info_fn(
HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
CHECK_HSA_STATUS("hsa_system_get_info()", status);
return (uint64_t)1000000000 / sysclock_hz;
}();
@@ -447,20 +432,21 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag
size_t size, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (hsa_support::async_copy_callback_enabled) {
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = hsa_support::async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
} else {
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
dep_signals, completion_signal);
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
@@ -469,21 +455,23 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (hsa_support::async_copy_callback_enabled) {
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = hsa_support::async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent,
dir, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
} else {
status =
hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir,
num_dep_signals, dep_signals, completion_signal);
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
@@ -855,6 +843,13 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(hsa_support::init_mutex);
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
RocpLoader::Instance();
hsa_support::async_copy_callback_enabled = true;
hsa_support::async_copy_callback_memory_pool = reinterpret_cast<MemoryPool*>(pool);
@@ -976,8 +971,17 @@ static void roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t o
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(hsa_support::init_mutex);
hsa_support::async_copy_callback_enabled = false;
hsa_support::async_copy_callback_memory_pool = nullptr;
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED ||
!"hsa_amd_profiling_async_copy_enable failed");
}
} else {
if (RocpLoader::GetRef() == nullptr) break;
if (!RocpLoader::Instance().EnableActivityCallback(op, false))
@@ -1178,33 +1182,12 @@ ROCTRACER_API roctracer_status_t roctracer_set_properties(roctracer_domain_t dom
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
// HSA OPS properties
hsa_ops_properties_t* ops_properties = reinterpret_cast<hsa_ops_properties_t*>(properties);
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
// HSA async-copy tracing
[[maybe_unused]] hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
hsa_support::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
hsa_support::hsa_amd_memory_async_copy_rect_fn =
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
table->amd_ext_->hsa_amd_memory_async_copy_fn =
hsa_support::hsa_amd_memory_async_copy_interceptor;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn =
hsa_support::hsa_amd_memory_async_copy_rect_interceptor;
break;
}
case ACTIVITY_DOMAIN_HSA_EVT: {
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
// HSA API properties
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
hsa_support::intercept_CoreApiTable(table->core_);
hsa_support::intercept_AmdExtTable(table->amd_ext_);
hsa_support::intercept_ImageExtTable(table->image_ext_);
break;
}
case ACTIVITY_DOMAIN_HIP_OPS:
@@ -1246,10 +1229,38 @@ ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
hsa_support::SaveHsaApi(table);
std::scoped_lock lock(hsa_support::init_mutex);
// Save the HSA core api and amd_ext api.
hsa_support::saved_core_api = *table->core_;
hsa_support::saved_amd_ext_api = *table->amd_ext_;
// Install the HSA_OPS intercept
table->amd_ext_->hsa_amd_memory_async_copy_fn =
hsa_support::hsa_amd_memory_async_copy_interceptor;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn =
hsa_support::hsa_amd_memory_async_copy_rect_interceptor;
// Install the HSA_API wrappers
hsa_support::InstallCoreApiWrappers(table->core_);
hsa_support::InstallAmdExtWrappers(table->amd_ext_);
hsa_support::InstallImageExtWrappers(table->image_ext_);
if (hsa_support::async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
return true;
}
ROCTRACER_EXPORT void OnUnload() {}
ROCTRACER_EXPORT void OnUnload() {
if (hsa_support::async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
}
} // extern "C"
-8
Wyświetl plik
@@ -931,9 +931,6 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
if (trace_hsa_api) {
hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt");
// initialize HSA tracing
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table);
fprintf(stdout, " HSA-trace(");
fflush(stdout);
if (hsa_api_vec.size() != 0) {
@@ -956,11 +953,6 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
if (trace_hsa_activity) {
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
// initialize HSA tracing
roctracer::hsa_ops_properties_t ops_properties{};
ops_properties.table = table;
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
// Allocating tracing pool
open_tracing_pool();
+66 -67
Wyświetl plik
@@ -1,69 +1,68 @@
ROCTracer (pid=882613):
0x555e40292a90 agent cpu
0x555e40295470 agent gpu
0x555e402cd500 agent gpu
538166319764062
ROCTracer (pid=566828):
0x55e1b9d507c0 agent cpu
0x55e1b9d4eeb0 agent gpu
0x55e1b9d8b540 agent gpu
975779239309496
HSA-trace()
HSA-activity-trace()
538166319915287:538166319920086 882613:882613 hsa_amd_profiling_async_copy_enable(1) = 0
538166320173874:538166320174335 882613:882613 hsa_agent_get_info({handle=93863291726480}, 0, 0x7ffddcd49dd8) = 0
538166320175287:538166320175548 882613:882613 hsa_agent_get_info({handle=93863291726480}, 17, 0x7ffddcd49dc0) = 0
538166320177511:538166320178523 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 0, 0x7ffddcd49bdc) = 0
538166320179144:538166320179415 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 1, 0x7ffddcd49be0) = 0
538166320180337:538166320180587 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 2, 0x7ffddcd49c00) = 0
538166320181198:538166320181449 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 6, 0x7ffddcd49c08) = 0
538166320183613:538166320183873 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 0, 0x7ffddcd49bdc) = 0
538166320184484:538166320184745 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 1, 0x7ffddcd49be0) = 0
538166320185336:538166320185587 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 2, 0x7ffddcd49c00) = 0
538166320186178:538166320186428 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 6, 0x7ffddcd49c08) = 0
538166320187390:538166320188031 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 0, 0x7ffddcd49bdc) = 0
538166320188632:538166320188883 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 1, 0x7ffddcd49be0) = 0
538166320189474:538166320189724 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 2, 0x7ffddcd49c00) = 0
538166320190205:538166320190456 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 6, 0x7ffddcd49c08) = 0
538166320190205:538166320191347 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291726480}, 1, 0x7ffddcd49e18) = 0
538166320194203:538166320196167 882613:882613 hsa_agent_get_info({handle=93863291737200}, 0, 0x7ffddcd49dd8) = 0
538166320196698:538166320196958 882613:882613 hsa_agent_get_info({handle=93863291737200}, 17, 0x7ffddcd49dc0) = 0
538166320198170:538166320198421 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 0, 0x7ffddcd49bcc) = 0
538166320198902:538166320199162 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 1, 0x7ffddcd49bd0) = 0
538166320199643:538166320199894 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 2, 0x7ffddcd49bf0) = 0
538166320200364:538166320200615 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 6, 0x7ffddcd49bf8) = 0
538166320201386:538166320201727 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291721456}, 0, 0x7ffddcd49bcc) = 0
538166320201386:538166320202288 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291737200}, 1, 0x7ffddcd49e18) = 0
538166320203881:538166320204332 882613:882613 hsa_agent_get_info({handle=93863291966720}, 0, 0x7ffddcd49dd8) = 0
538166320204843:538166320205103 882613:882613 hsa_agent_get_info({handle=93863291966720}, 17, 0x7ffddcd49dc0) = 0
538166320206366:538166320206606 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 0, 0x7ffddcd49bcc) = 0
538166320207087:538166320207348 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 1, 0x7ffddcd49bd0) = 0
538166320207818:538166320208069 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 2, 0x7ffddcd49bf0) = 0
538166320209782:538166320210043 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 6, 0x7ffddcd49bf8) = 0
538166320210724:538166320210984 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291969440}, 0, 0x7ffddcd49bcc) = 0
538166320210724:538166320211566 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291966720}, 1, 0x7ffddcd49e18) = 0
538166320210724:538166320213028 882613:882613 hsa_iterate_agents(1, 0) = 0
538166320294151:538166336777646 882613:882616 hsa_queue_create({handle=93863291737200}, 1024, 1, 0, 0, 0, 0, 0x7f91ed49ac88) = 0
538166336809796:538166336869439 882613:882616 hsa_code_object_reader_create_from_file(5, 0x7f91ed49acb8) = 0
538166336870721:538166336883686 882613:882616 hsa_executable_create_alt(1, 0, 0x0, 0x7f91ed49acc0) = 0
538166336884868:538166337268842 882613:882616 hsa_executable_load_agent_code_object({handle=140259270599920}, {handle=93863291737200}, {handle=140259270599248}, 0x0, 0) = 0
538166337270696:538166337974663 882613:882616 hsa_executable_freeze({handle=140259270599920}, 0x0) = 0
538166337978250:538166337979312 882613:882616 hsa_executable_get_symbol_by_name({handle=140259270599920}, 0x7f90a80029b0, 0x7f91ed49abe8, 0x7f91ed49ac08) = 1013
538166337982077:538166337982708 882613:882616 hsa_executable_get_symbol_by_name({handle=140259270599920}, 0x7f90a80038b0, 0x7f91ed49abe8, 0x7f91ed49ac08) = 0
538166337984311:538166337984983 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 22, 0x7f91ed49acd0) = 0
538166337985904:538166337986295 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 14, 0x7f91ed49acd8) = 0
538166337987207:538166337987597 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 13, 0x7f91ed49acdc) = 0
538166337988499:538166337988880 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 11, 0x7f91ed49ace0) = 0
538166337989792:538166337990182 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 12, 0x7f91ed49ace4) = 0
538166337991855:538166338004199 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 72, 0, 0x7f91ed49ac30) = 0
538166338005371:538166338705110 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc74000) = 0
538166338706453:538166338719107 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 256, 0, 0x7f91ed49ac30) = 0
538166338720059:538166338850665 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc72000) = 0
538166338851607:538166338864782 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 256, 0, 0x7f91ed49ac30) = 0
538166338865723:538166338978927 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc70000) = 0
538166338980309:538166338982524 882613:882616 hsa_amd_signal_create(1, 0, 0, 0, 0x7f91ed49ac90) = 0
538166338983816:538166338984628 882613:882616 hsa_queue_load_write_index_relaxed(0x7f91edff6000) = 0
538166338985549:538166338986160 882613:882616 hsa_queue_load_read_index_relaxed(0x7f91edff6000) = 0
538166338987032:538166338987453 882613:882616 hsa_queue_store_write_index_screlease(0x7f91edff6000, 1) = void
538166338988515:538166338989397 882613:882616 hsa_signal_store_screlease({handle=140264755457280}, 0) = void
538166338990278:538166339006328 882613:882616 hsa_signal_wait_scacquire({handle=140264755456768}, 0, 0, 18446744073709551615, 0) = 0
538166339009224:538166339010346 882613:882616 hsa_signal_destroy({handle=140264755456768}) = 0
538166339011689:538166339037938 882613:882616 hsa_memory_free(0x7f91ecc72000) = 0
538166339038930:538166339052135 882613:882616 hsa_memory_free(0x7f91ecc70000) = 0
538166339053307:538166339082843 882613:882616 hsa_executable_destroy({handle=140259270599920}) = 0
538166339083895:538166339087181 882613:882616 hsa_code_object_reader_destroy({handle=140259270599248}) = 0
975779240024464:975779240024815 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :6
975779240029173:975779240029274 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :7
975779240035816:975779240036187 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :9
975779240036667:975779240036768 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :10
975779240037219:975779240037319 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :11
975779240037760:975779240037860 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :12
975779240039823:975779240039914 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :13
975779240040455:975779240040555 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :14
975779240041076:975779240041156 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :15
975779240041697:975779240041777 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :16
975779240042619:975779240042709 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :17
975779240043250:975779240043350 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :18
975779240043871:975779240043961 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :19
975779240044482:975779240044562 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :20
975779240044482:975779240045264 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :8
975779240048430:975779240049341 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :21
975779240049822:975779240049932 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :22
975779240050654:975779240050744 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :24
975779240051275:975779240051365 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :25
975779240051786:975779240051866 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :26
975779240052287:975779240052377 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :27
975779240053048:975779240053159 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :28
975779240053048:975779240053609 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :23
975779240055373:975779240055663 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :29
975779240056144:975779240056234 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :30
975779240056986:975779240057076 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :32
975779240057507:975779240057587 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :33
975779240058008:975779240058088 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :34
975779240058509:975779240058589 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :35
975779240061504:975779240061605 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :36
975779240061504:975779240062035 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :31
975779240061504:975779240063528 566828:566828 hsa_iterate_agents(1, 0) = 0 :5
975779240167184:975779249865797 566828:566834 hsa_queue_create(, 1024, 1, 0, 0, 0, 0, 0x7f70535fdbc8) = 0 :37
975779249901595:975779249990022 566828:566834 hsa_code_object_reader_create_from_file(8, 0x7f70535fdbf8) = 0 :38
975779249990984:975779250001383 566828:566834 hsa_executable_create_alt(1, 0, 0x0, 0x7f70535fdc00) = 0 :27
975779250002345:975779250430202 566828:566834 hsa_executable_load_agent_code_object(, , , 0x0, 0) = 0 :28
975779250432296:975779250926909 566828:566834 hsa_executable_freeze(, 0x0) = 0 :29
975779250929755:975779250931207 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054002950, 0x7f70535fdb28, 0x7f70535fdb48) = 1013 :2a
975779250931979:975779250932690 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054003850, 0x7f70535fdb28, 0x7f70535fdb48) = 0 :2b
975779250934403:975779250934854 566828:566834 hsa_executable_symbol_get_info(, 22, 0x7f70535fdc10) = 0 :44
975779250935425:975779250935536 566828:566834 hsa_executable_symbol_get_info(, 14, 0x7f70535fdc18) = 0 :45
975779250936097:975779250936177 566828:566834 hsa_executable_symbol_get_info(, 13, 0x7f70535fdc1c) = 0 :46
975779250936728:975779250936798 566828:566834 hsa_executable_symbol_get_info(, 11, 0x7f70535fdc20) = 0 :47
975779250937349:975779250937419 566828:566834 hsa_executable_symbol_get_info(, 12, 0x7f70535fdc24) = 0 :48
975779250938321:975779250956876 566828:566834 hsa_amd_memory_pool_allocate(, 72, 0, 0x7f70535fdb70) = 0 :49
975779250958098:975779251048298 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f7261070000) = 0 :50
975779251049150:975779251065531 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :51
975779251066232:975779251149319 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106e000) = 0 :52
975779251150000:975779251165960 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :53
975779251166531:975779251256912 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106c000) = 0 :54
975779251258114:975779251261601 566828:566834 hsa_amd_signal_create(1, 0, 0, 0, 0x7f70535fdbd0) = 0 :55
975779251262923:975779251263204 566828:566834 hsa_queue_load_write_index_relaxed(0x7f726109e000) = 0 :56
975779251264065:975779251264276 566828:566834 hsa_queue_load_read_index_relaxed(0x7f726109e000) = 0 :57
975779251264937:975779251265178 566828:566834 hsa_queue_store_write_index_screlease(0x7f726109e000, 1) = void :58
975779251265969:975779251266951 566828:566834 hsa_signal_store_screlease(, 0) = void :59
975779251267472:975779251283773 566828:566834 hsa_signal_wait_scacquire(, 0, 0, 18446744073709551615, 0) = 0 :60
975779251284654:975779251286848 566828:566834 hsa_signal_destroy() = 0 :61
975779251290806:975779251322035 566828:566834 hsa_memory_free(0x7f726106e000) = 0 :62
975779251322646:975779251341261 566828:566834 hsa_memory_free(0x7f726106c000) = 0 :63
975779251342043:975779251389061 566828:566834 hsa_executable_destroy() = 0 :64
975779251389843:975779251392488 566828:566834 hsa_code_object_reader_destroy() = 0 :65
@@ -1,24 +1,22 @@
ROCTracer (pid=202688):
0x55a6e1bfa280 agent cpu
0x55a6e1bf9470 agent gpu
0x55a6e1c34d10 agent gpu
169339419628779
ROCTracer (pid=566858):
0x55ae2fa607c0 agent cpu
0x55ae2fa5eeb0 agent gpu
0x55ae2fa9b540 agent gpu
975785718853775
HSA-trace()
HSA-activity-trace()
169339419776888:169339419781697 202688:202688 hsa_amd_profiling_async_copy_enable(1) = 0
169339420102142:169339420102703 202688:202688 hsa_agent_get_info({handle=94175240364672}, 17, 0x7ffe818aff34) = 0
169339420104075:169339420104606 202688:202688 hsa_agent_get_info({handle=94175240361072}, 17, 0x7ffe818aff34) = 0
169339420105568:169339420106049 202688:202688 hsa_agent_get_info({handle=94175240604944}, 17, 0x7ffe818aff34) = 0
169339420105568:169339420106941 202688:202688 hsa_iterate_agents(1, 0) = 0
ROCTracer (pid=202688):
0x55a6e1bfa280 agent cpu
0x55a6e1c99260 agent gpu
0x55a6e1c95d50 agent gpu
169339843452619
975785719398623:975785719398824 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :6
975785719403482:975785719403643 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :7
975785719404274:975785719404364 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :8
975785719404274:975785719404885 566858:566858 hsa_iterate_agents(1, 0) = 0 :5
ROCTracer (pid=566858):
0x55ae2fa607c0 agent cpu
0x55ae2fb02cc0 agent gpu
0x55ae2fa62970 agent gpu
975785742239830
HSA-trace()
HSA-activity-trace()
169339843601029:169339843605668 202688:202688 hsa_amd_profiling_async_copy_enable(1) = 0
169339843768255:169339843768816 202688:202688 hsa_agent_get_info({handle=94175240364672}, 17, 0x7ffe818aff34) = 0
169339843770028:169339843770549 202688:202688 hsa_agent_get_info({handle=94175241015904}, 17, 0x7ffe818aff34) = 0
169339843771491:169339843771962 202688:202688 hsa_agent_get_info({handle=94175241002320}, 17, 0x7ffe818aff34) = 0
169339843771491:169339843772854 202688:202688 hsa_iterate_agents(1, 0) = 0
975785742436120:975785742436310 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :14
975785742437352:975785742437472 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :15
975785742437963:975785742438053 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :16
975785742437963:975785742438464 566858:566858 hsa_iterate_agents(1, 0) = 0 :13