Bring the HSA_EVT callbacks back to the roctracer
Change-Id: I26080b264d7989880ba7e9f00502cc680b2256d7
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
#ifndef EXT_PROF_PROTOCOL_H_
|
||||
#define EXT_PROF_PROTOCOL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Traced API domains */
|
||||
|
||||
+67
-5
@@ -27,8 +27,6 @@
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa_prof_str.h>
|
||||
|
||||
#include <rocprofiler/activity.h>
|
||||
|
||||
// HSA OP ID enumeration
|
||||
enum hsa_op_id_t {
|
||||
HSA_OP_ID_DISPATCH = 0,
|
||||
@@ -38,9 +36,73 @@ enum hsa_op_id_t {
|
||||
HSA_OP_ID_NUMBER
|
||||
};
|
||||
|
||||
struct hsa_ops_properties_t {
|
||||
void* table;
|
||||
void* reserved1[3];
|
||||
// HSA EVT ID enumeration
|
||||
enum hsa_evt_id_t {
|
||||
HSA_EVT_ID_ALLOCATE = 0, // Memory allocate callback
|
||||
HSA_EVT_ID_DEVICE = 1, // Device assign callback
|
||||
HSA_EVT_ID_MEMCOPY = 2, // Memcopy callback
|
||||
HSA_EVT_ID_SUBMIT = 3, // Packet submission callback
|
||||
HSA_EVT_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
|
||||
HSA_EVT_ID_CODEOBJ = 5, // Loading/unloading of device code object
|
||||
HSA_EVT_ID_NUMBER
|
||||
};
|
||||
|
||||
struct hsa_ops_properties_t {
|
||||
void* reserved1[4];
|
||||
};
|
||||
|
||||
// HSA EVT data type
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
const void* ptr; // allocated area ptr
|
||||
size_t size; // allocated area size, zero size means 'free' callback
|
||||
hsa_amd_segment_t segment; // allocated area's memory segment type
|
||||
hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag
|
||||
int is_code; // equal to 1 if code is allocated
|
||||
} allocate;
|
||||
|
||||
struct {
|
||||
hsa_device_type_t type; // type of assigned device
|
||||
uint32_t id; // id of assigned device
|
||||
hsa_agent_t agent; // device HSA agent handle
|
||||
const void* ptr; // ptr the device is assigned to
|
||||
} device;
|
||||
|
||||
struct {
|
||||
const void* dst; // memcopy dst ptr
|
||||
const void* src; // memcopy src ptr
|
||||
size_t size; // memcopy size bytes
|
||||
} memcopy;
|
||||
|
||||
struct {
|
||||
const void* packet; // submitted to GPU packet
|
||||
const char* kernel_name; // kernel name, NULL if not a kernel dispatch packet
|
||||
hsa_queue_t* queue; // HSA queue the packet was submitted to
|
||||
uint32_t device_type; // type of device the packet is submitted to
|
||||
uint32_t device_id; // id of device the packet is submitted to
|
||||
} submit;
|
||||
|
||||
struct {
|
||||
uint64_t object; // kernel symbol object
|
||||
const char* name; // kernel symbol name
|
||||
uint32_t name_length; // kernel symbol name length
|
||||
int unload; // symbol executable destroy
|
||||
} ksymbol;
|
||||
|
||||
struct {
|
||||
uint32_t storage_type; // code object storage type
|
||||
int storage_file; // origin file descriptor
|
||||
uint64_t memory_base; // origin memory base
|
||||
uint64_t memory_size; // origin memory size
|
||||
uint64_t load_base; // code object load base
|
||||
uint64_t load_size; // code object load size
|
||||
uint64_t load_delta; // code object load size
|
||||
uint32_t uri_length; // URI string length (not including the terminating NUL character)
|
||||
const char* uri; // URI string
|
||||
int unload; // unload flag
|
||||
} codeobj;
|
||||
};
|
||||
} hsa_evt_data_t;
|
||||
|
||||
#endif // INC_ROCTRACER_HSA_H_
|
||||
|
||||
@@ -52,6 +52,7 @@ LICENSE = \
|
||||
|
||||
|
||||
header_basic = \
|
||||
'namespace detail {\n' + \
|
||||
'template <typename T>\n' + \
|
||||
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
|
||||
' using std::operator<<;\n' + \
|
||||
@@ -111,9 +112,9 @@ def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, a
|
||||
indent = ""
|
||||
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
|
||||
indent = " "
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \"" + name + "=\");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, v." + name + ");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \", \");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \"" + name + "=\");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \", \");\n"
|
||||
str += " }\n"
|
||||
if "void" not in mtype:
|
||||
global_str += str
|
||||
@@ -181,7 +182,7 @@ def gen_cppheader(infilepath, outfilepath, rank):
|
||||
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
|
||||
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
|
||||
output_filename_h.write("{\n")
|
||||
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '{');\n")
|
||||
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '{');\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
|
||||
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
|
||||
process_struct(output_filename_h, c, cppHeader, "", apiname)
|
||||
@@ -190,15 +191,15 @@ def gen_cppheader(infilepath, outfilepath, rank):
|
||||
output_filename_h.write(global_str)
|
||||
output_filename_h.write(" };\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
|
||||
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '}');\n")
|
||||
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '}');\n")
|
||||
output_filename_h.write(" return out;\n")
|
||||
output_filename_h.write("}\n")
|
||||
global_str = ''
|
||||
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
|
||||
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
|
||||
|
||||
if rank == 1 or rank == 2:
|
||||
footer = '// end ostream ops for '+ apiname + ' \n'
|
||||
footer += '};};\n\n'
|
||||
footer += '};};};\n\n'
|
||||
output_filename_h.write(footer)
|
||||
output_filename_h.write(global_ops)
|
||||
footer = '#endif //__cplusplus\n' + \
|
||||
|
||||
+3
-4
@@ -334,8 +334,7 @@ class API_DescrParser:
|
||||
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
|
||||
self.cpp_content += '#include \"util/callback_table.h\"\n\n'
|
||||
self.cpp_content += '#include <atomic>\n'
|
||||
self.cpp_content += 'namespace roctracer {\n'
|
||||
self.cpp_content += 'namespace hsa_support {\n\n'
|
||||
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
|
||||
|
||||
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
|
||||
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
|
||||
@@ -345,7 +344,7 @@ class API_DescrParser:
|
||||
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
|
||||
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
|
||||
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
|
||||
self.cpp_content += '\n};};\n'
|
||||
self.cpp_content += '\n};\n'
|
||||
|
||||
# add code section
|
||||
def add_section(self, title, gap, fun):
|
||||
@@ -465,7 +464,7 @@ class API_DescrParser:
|
||||
def gen_get_name(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'static const char* GetApiName(const uint32_t& id) {\n'
|
||||
content += 'static const char* GetApiName(uint32_t id) {\n'
|
||||
content += ' switch (id) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#define EXC_RAISING(error, stream) \
|
||||
do { \
|
||||
|
||||
@@ -0,0 +1,602 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hsa_support.h"
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "exception.h"
|
||||
#include "loader.h"
|
||||
#include "memory_pool.h"
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hsa.h"
|
||||
#include "tracker.h"
|
||||
#include "util/callback_table.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
#include <mutex>
|
||||
|
||||
#include "hsa_prof_str.inline.h"
|
||||
|
||||
namespace roctracer::hsa_support {
|
||||
|
||||
namespace {
|
||||
|
||||
util::CallbackTable<ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_NUMBER> hsa_evt_cb_table;
|
||||
|
||||
CoreApiTable saved_core_api{};
|
||||
AmdExtTable saved_amd_ext_api{};
|
||||
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
|
||||
|
||||
// async copy activity callback
|
||||
std::mutex init_mutex;
|
||||
bool async_copy_callback_enabled = false;
|
||||
MemoryPool* async_copy_callback_memory_pool = nullptr;
|
||||
|
||||
struct AgentInfo {
|
||||
int index;
|
||||
hsa_device_type_t type;
|
||||
};
|
||||
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
|
||||
|
||||
hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = *ptr;
|
||||
data.allocate.size = size;
|
||||
if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT,
|
||||
&data.allocate.segment) != HSA_STATUS_SUCCESS ||
|
||||
saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS,
|
||||
&data.allocate.global_flag) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_region_get_info failed");
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent,
|
||||
hsa_access_permission_t access) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) {
|
||||
hsa_evt_data_t data{};
|
||||
data.device.ptr = ptr;
|
||||
if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_agent_get_info failed");
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_MEMCOPY); callback_fun) {
|
||||
hsa_evt_data_t data{};
|
||||
data.memcopy.dst = dst;
|
||||
data.memcopy.src = src;
|
||||
data.memcopy.size = size;
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data, callback_arg);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags,
|
||||
void** ptr) {
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr);
|
||||
if (size == 0 || status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = *ptr;
|
||||
data.allocate.size = size;
|
||||
|
||||
if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS ||
|
||||
saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_region_get_info failed");
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
|
||||
|
||||
if (std::tie(callback_fun, callback_arg) = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE);
|
||||
!callback_fun)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
// FIXME: Why is this only reported if HSA_EVT_ID_ALLOCATE is also set?
|
||||
auto callback_data = std::make_tuple(callback_fun, callback_arg, pool, ptr);
|
||||
auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) {
|
||||
auto [callback_fun, callback_arg, pool, ptr] =
|
||||
*reinterpret_cast<decltype(callback_data)*>(iterate_agent_callback_data);
|
||||
|
||||
if (hsa_amd_memory_pool_access_t value;
|
||||
saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn(
|
||||
agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS ||
|
||||
value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
data.device.id = it->second.index;
|
||||
data.device.agent = agent;
|
||||
data.device.ptr = ptr;
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryPoolFreeIntercept(void* ptr) {
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = ptr;
|
||||
data.allocate.size = 0;
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
|
||||
}
|
||||
|
||||
return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr);
|
||||
}
|
||||
|
||||
// Agent allow access callback 'hsa_amd_agents_allow_access'
|
||||
hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
const uint32_t* flags, const void* ptr) {
|
||||
hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) {
|
||||
while (num_agents--) {
|
||||
hsa_agent_t agent = *agents++;
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
data.device.id = it->second.index;
|
||||
data.device.agent = agent;
|
||||
data.device.ptr = ptr;
|
||||
|
||||
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
struct CodeObjectCallbackArg {
|
||||
activity_rtapi_callback_t callback_fun;
|
||||
void* callback_arg;
|
||||
bool unload;
|
||||
};
|
||||
|
||||
hsa_status_t CodeObjectCallback(hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object, void* arg) {
|
||||
auto* code_object_callback_arg = static_cast<CodeObjectCallbackArg*>(arg);
|
||||
hsa_evt_data_t data{};
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE,
|
||||
&data.codeobj.storage_type) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) {
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE,
|
||||
&data.codeobj.storage_file) != HSA_STATUS_SUCCESS ||
|
||||
data.codeobj.storage_file == -1)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
data.codeobj.memory_base = data.codeobj.memory_size = 0;
|
||||
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) {
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE,
|
||||
&data.codeobj.memory_base) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE,
|
||||
&data.codeobj.memory_size) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
data.codeobj.storage_file = -1;
|
||||
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) {
|
||||
return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects?
|
||||
} else {
|
||||
FATAL_LOGGING("Unknown code object storage type: " << data.codeobj.storage_type);
|
||||
}
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
|
||||
&data.codeobj.load_base) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
|
||||
&data.codeobj.load_size) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
|
||||
&data.codeobj.load_delta) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
|
||||
&data.codeobj.uri_length) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
std::string uri_str(data.codeobj.uri_length, '\0');
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
data.codeobj.uri = uri_str.c_str();
|
||||
data.codeobj.unload = code_object_callback_arg->unload ? 1 : 0;
|
||||
code_object_callback_arg->callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data,
|
||||
code_object_callback_arg->callback_arg);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) {
|
||||
hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) {
|
||||
CodeObjectCallbackArg arg = {callback_fun, callback_arg, false};
|
||||
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable, CodeObjectCallback, &arg);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
|
||||
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) {
|
||||
CodeObjectCallbackArg arg = {callback_fun, callback_arg, true};
|
||||
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable, CodeObjectCallback, &arg);
|
||||
}
|
||||
|
||||
return saved_core_api.hsa_executable_destroy_fn(executable);
|
||||
}
|
||||
|
||||
void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
record.op = HSA_OP_ID_COPY;
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
record.device_id = 0;
|
||||
record.correlation_id = entry->correlation_id;
|
||||
entry->pool->Write(record);
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src,
|
||||
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
if (!async_copy_callback_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
|
||||
const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
|
||||
hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
if (!async_copy_callback_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
pool->Write(*record_ptr);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
roctracer_timestamp_t timestamp_ns() {
|
||||
uint64_t sysclock;
|
||||
|
||||
if (saved_core_api.hsa_system_get_info_fn == nullptr)
|
||||
FATAL_LOGGING("HSA intercept is not active");
|
||||
|
||||
if (hsa_status_t status =
|
||||
saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
|
||||
return 0;
|
||||
else if (status != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_system_get_info failed");
|
||||
|
||||
static uint64_t sysclock_period = []() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
if (hsa_status_t status = saved_core_api.hsa_system_get_info_fn(
|
||||
HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
status != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_system_get_info failed");
|
||||
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
return sysclock * sysclock_period;
|
||||
}
|
||||
|
||||
void Initialize(HsaApiTable* table) {
|
||||
std::scoped_lock lock(init_mutex);
|
||||
|
||||
// Save the HSA core api and amd_ext api.
|
||||
saved_core_api = *table->core_;
|
||||
saved_amd_ext_api = *table->amd_ext_;
|
||||
|
||||
// Enumerate the agents.
|
||||
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
hsa_support::AgentInfo agent_info;
|
||||
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
|
||||
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_agent_get_info failed");
|
||||
switch (agent_info.type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
static int cpu_agent_count = 0;
|
||||
agent_info.index = cpu_agent_count++;
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
static int gpu_agent_count = 0;
|
||||
agent_info.index = gpu_agent_count++;
|
||||
break;
|
||||
default:
|
||||
static int other_agent_count = 0;
|
||||
agent_info.index = other_agent_count++;
|
||||
break;
|
||||
}
|
||||
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_iterate_agents failed");
|
||||
|
||||
// Install the code object intercept.
|
||||
hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn(
|
||||
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
|
||||
if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_system_get_major_extension_table failed");
|
||||
|
||||
// Install the HSA_OPS intercept
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept;
|
||||
|
||||
// Install the HSA_EVT intercept
|
||||
table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept;
|
||||
table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept;
|
||||
table->core_->hsa_memory_copy_fn = MemoryCopyIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept;
|
||||
table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept;
|
||||
table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept;
|
||||
table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept;
|
||||
|
||||
// Install the HSA_API wrappers
|
||||
detail::InstallCoreApiWrappers(table->core_);
|
||||
detail::InstallAmdExtWrappers(table->amd_ext_);
|
||||
detail::InstallImageExtWrappers(table->image_ext_);
|
||||
|
||||
if (async_copy_callback_enabled) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
}
|
||||
|
||||
void Finalize() {
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetApiName(uint32_t id) { return detail::GetApiName(id); }
|
||||
|
||||
const char* GetEvtName(uint32_t id) {
|
||||
switch (id) {
|
||||
case HSA_EVT_ID_ALLOCATE:
|
||||
return "ALLOCATE";
|
||||
case HSA_EVT_ID_DEVICE:
|
||||
return "DEVICE";
|
||||
case HSA_EVT_ID_MEMCOPY:
|
||||
return "MEMCOPY";
|
||||
case HSA_EVT_ID_SUBMIT:
|
||||
return "SUBMIT";
|
||||
case HSA_EVT_ID_KSYMBOL:
|
||||
return "KSYMBOL";
|
||||
case HSA_EVT_ID_CODEOBJ:
|
||||
return "CODEOBJ";
|
||||
case HSA_EVT_ID_NUMBER:
|
||||
break;
|
||||
};
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id");
|
||||
}
|
||||
|
||||
const char* GetOpsName(uint32_t id) { return RocpLoader::Instance().GetOpName(id); }
|
||||
|
||||
uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); }
|
||||
|
||||
void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
std::scoped_lock lock(init_mutex);
|
||||
|
||||
if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
async_copy_callback_enabled = true;
|
||||
async_copy_callback_memory_pool = reinterpret_cast<MemoryPool*>(pool);
|
||||
} else {
|
||||
const bool init_phase = (RocpLoader::GetRef() == nullptr);
|
||||
if (RocpLoader::GetRef() == nullptr) break;
|
||||
if (init_phase) {
|
||||
RocpLoader::Instance().InitActivityCallback(
|
||||
reinterpret_cast<void*>(AsyncActivityCallback), pool);
|
||||
}
|
||||
if (!RocpLoader::Instance().EnableActivityCallback(op, true))
|
||||
FATAL_LOGGING("HSA::EnableActivityCallback error");
|
||||
}
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
// FIXME: Add HSA api activities.
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
if (cid >= HSA_API_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA API operation ID(" << cid << ")");
|
||||
|
||||
detail::cb_table.Set(cid, callback, user_data);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
if (cid >= HSA_EVT_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA API operation ID(" << cid << ")");
|
||||
|
||||
hsa_evt_cb_table.Set(cid, callback, user_data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DisableActivity(roctracer_domain_t domain, uint32_t op) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
std::scoped_lock lock(init_mutex);
|
||||
|
||||
async_copy_callback_enabled = false;
|
||||
async_copy_callback_memory_pool = nullptr;
|
||||
|
||||
if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
|
||||
assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED ||
|
||||
!"hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
} else {
|
||||
if (RocpLoader::GetRef() != nullptr &&
|
||||
!RocpLoader::Instance().EnableActivityCallback(op, false))
|
||||
FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")");
|
||||
}
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
// FIXME: Add HSA api activities.
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DisableCallback(roctracer_domain_t domain, uint32_t cid) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
if (cid >= HSA_API_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA API operation ID(" << cid << ")");
|
||||
detail::cb_table.Set(cid, nullptr, nullptr);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
if (cid >= HSA_EVT_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA EVT operation ID(" << cid << ")");
|
||||
hsa_evt_cb_table.Set(cid, nullptr, nullptr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace roctracer::hsa_support
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef HSA_SUPPORT_H_
|
||||
#define HSA_SUPPORT_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
namespace roctracer::hsa_support {
|
||||
|
||||
void Initialize(HsaApiTable* table);
|
||||
void Finalize();
|
||||
|
||||
const char* GetApiName(uint32_t id);
|
||||
const char* GetEvtName(uint32_t id);
|
||||
const char* GetOpsName(uint32_t id);
|
||||
uint32_t GetApiCode(const char* str);
|
||||
|
||||
void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool);
|
||||
void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback,
|
||||
void* user_data);
|
||||
|
||||
void DisableCallback(roctracer_domain_t domain, uint32_t cid);
|
||||
void DisableActivity(roctracer_domain_t domain, uint32_t op);
|
||||
|
||||
uint64_t timestamp_ns();
|
||||
|
||||
} // namespace roctracer::hsa_support
|
||||
|
||||
#endif // HSA_SUPPORT_H_
|
||||
@@ -24,6 +24,8 @@
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <dlfcn.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#define ONLD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
@@ -36,7 +38,7 @@ namespace roctracer {
|
||||
|
||||
// Base runtime loader class
|
||||
template <class T> class BaseLoader : public T {
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetPid() { return ::syscall(__NR_getpid); }
|
||||
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
@@ -132,8 +134,12 @@ class RocpApi {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
// HIP runtime library loader class
|
||||
#include "roctracer_hip.h"
|
||||
|
||||
namespace roctracer {
|
||||
#if STATIC_BUILD
|
||||
__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
|
||||
return hipErrorUnknown;
|
||||
|
||||
@@ -21,6 +21,8 @@
|
||||
#ifndef MEMORY_POOL_H_
|
||||
#define MEMORY_POOL_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdlib>
|
||||
|
||||
+24
-261
@@ -40,13 +40,12 @@
|
||||
#include "correlation_id.h"
|
||||
#include "journal.h"
|
||||
#include "loader.h"
|
||||
#include "hsa_support.h"
|
||||
#include "memory_pool.h"
|
||||
#include "tracker.h"
|
||||
#include "exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
#include "hsa_prof_str.inline.h"
|
||||
|
||||
#define CHECK_HSA_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
@@ -106,46 +105,11 @@ static inline uint32_t GetTid() {
|
||||
//
|
||||
namespace roctracer {
|
||||
|
||||
namespace hsa_support {
|
||||
|
||||
static CoreApiTable saved_core_api;
|
||||
static AmdExtTable saved_amd_ext_api;
|
||||
|
||||
// async copy activity callback
|
||||
std::mutex init_mutex;
|
||||
bool async_copy_callback_enabled = false;
|
||||
MemoryPool* async_copy_callback_memory_pool = nullptr;
|
||||
|
||||
} // namespace hsa_support
|
||||
|
||||
namespace ext_support {
|
||||
roctracer_start_cb_t roctracer_start_cb = nullptr;
|
||||
roctracer_stop_cb_t roctracer_stop_cb = nullptr;
|
||||
} // namespace ext_support
|
||||
|
||||
namespace util {
|
||||
|
||||
roctracer_timestamp_t timestamp_ns() {
|
||||
uint64_t sysclock;
|
||||
|
||||
hsa_status_t status =
|
||||
hsa_support::saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0;
|
||||
CHECK_HSA_STATUS("hsa_system_get_info()", status);
|
||||
|
||||
static uint64_t sysclock_period = []() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_support::saved_core_api.hsa_system_get_info_fn(
|
||||
HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_HSA_STATUS("hsa_system_get_info()", status);
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
return sysclock * sysclock_period;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
||||
struct CallbackJournalData {
|
||||
roctracer_rtapi_callback_t callback;
|
||||
void* user_data;
|
||||
@@ -193,12 +157,12 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
|
||||
record->op = op_id;
|
||||
record->process_id = GetPid();
|
||||
record->thread_id = GetTid();
|
||||
record->begin_ns = util::timestamp_ns();
|
||||
record->begin_ns = hsa_support::timestamp_ns();
|
||||
record->correlation_id = correlation_id;
|
||||
}
|
||||
} else {
|
||||
if (pool != nullptr) {
|
||||
record->end_ns = util::timestamp_ns();
|
||||
record->end_ns = hsa_support::timestamp_ns();
|
||||
|
||||
if (auto external_id = ExternalCorrelationId()) {
|
||||
roctracer_record_t ext_record{};
|
||||
@@ -247,82 +211,6 @@ void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) {
|
||||
record_ptr->begin_ns, record_ptr->end_ns);
|
||||
}
|
||||
|
||||
namespace hsa_support {
|
||||
|
||||
struct AgentInfo {
|
||||
int index;
|
||||
hsa_device_type_t type;
|
||||
};
|
||||
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
|
||||
|
||||
void hsa_async_copy_handler(const Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
record.op = HSA_OP_ID_COPY;
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
record.device_id = 0;
|
||||
record.correlation_id = entry->correlation_id;
|
||||
entry->pool->Write(record);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent,
|
||||
const void* src, hsa_agent_t src_agent,
|
||||
size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
if (!async_copy_callback_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
|
||||
hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
if (!async_copy_callback_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace hsa_support
|
||||
|
||||
void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
pool->Write(*record_ptr);
|
||||
}
|
||||
|
||||
// Logger routines and primitives
|
||||
util::Logger::mutex_t util::Logger::mutex_;
|
||||
std::atomic<util::Logger*> util::Logger::instance_{};
|
||||
@@ -340,6 +228,7 @@ unsigned set_stopped(unsigned val) {
|
||||
stop_status_value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
using namespace roctracer;
|
||||
@@ -367,9 +256,9 @@ ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return hsa_support::GetApiName(op);
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return RocpLoader::Instance().GetEvtName(op);
|
||||
return hsa_support::GetEvtName(op);
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return RocpLoader::Instance().GetOpName(op);
|
||||
return hsa_support::GetOpsName(op);
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return HipLoader::Instance().GetOpName(kind);
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
@@ -460,27 +349,10 @@ static void roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op
|
||||
roctracer_rtapi_callback_t callback, void* user_data) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
hsa_support::EnableCallback(domain, op, callback, user_data);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH) {
|
||||
if (!RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data))
|
||||
FATAL_LOGGING("HSA::RegisterApiCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (op >= HSA_API_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA API operation ID(" << op << ")");
|
||||
|
||||
hsa_support::cb_table.Set(op, callback, user_data);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: {
|
||||
if (!RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data))
|
||||
FATAL_LOGGING("HSA::RegisterEvtCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
@@ -539,19 +411,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
|
||||
static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t op) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
hsa_support::DisableCallback(domain, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH && !RocpLoader::Instance().RemoveApiCallback(op))
|
||||
FATAL_LOGGING("HSA::RemoveActivityCallback error(" << op << ") failed");
|
||||
break;
|
||||
#endif
|
||||
if (op >= HSA_API_ID_NUMBER)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"invalid HSA API operation ID(" << op << ")");
|
||||
hsa_support::cb_table.Set(op, nullptr, nullptr);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
@@ -568,11 +431,6 @@ static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t o
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: {
|
||||
if (!RocpLoader::Instance().RemoveEvtCallback(op))
|
||||
FATAL_LOGGING("HSA::RemoveEvtCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
if (RocTxLoader::Instance().Enabled() && !RocTxLoader::Instance().RemoveApiCallback(op))
|
||||
FATAL_LOGGING("ROCTX::RemoveApiCallback(" << op << ") failed");
|
||||
@@ -656,34 +514,11 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op
|
||||
roctracer_pool_t* pool) {
|
||||
assert(pool != nullptr);
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
std::scoped_lock lock(hsa_support::init_mutex);
|
||||
|
||||
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
RocpLoader::Instance();
|
||||
hsa_support::async_copy_callback_enabled = true;
|
||||
hsa_support::async_copy_callback_memory_pool = reinterpret_cast<MemoryPool*>(pool);
|
||||
} else {
|
||||
const bool init_phase = (RocpLoader::GetRef() == nullptr);
|
||||
if (RocpLoader::GetRef() == nullptr) break;
|
||||
if (init_phase) {
|
||||
RocpLoader::Instance().InitActivityCallback((void*)HSA_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
}
|
||||
if (!RocpLoader::Instance().EnableActivityCallback(op, true))
|
||||
FATAL_LOGGING("HSA::EnableActivityCallback error");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
RocpLoader::Instance();
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
hsa_support::EnableActivity(domain, op, pool);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
if (HipLoader::Instance().Enabled() &&
|
||||
@@ -757,29 +592,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domai
|
||||
// Disable activity records logging
|
||||
static void roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t op) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
std::scoped_lock lock(hsa_support::init_mutex);
|
||||
|
||||
hsa_support::async_copy_callback_enabled = false;
|
||||
hsa_support::async_copy_callback_memory_pool = nullptr;
|
||||
|
||||
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
|
||||
assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED ||
|
||||
!"hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
} else {
|
||||
if (RocpLoader::GetRef() == nullptr) break;
|
||||
if (!RocpLoader::Instance().EnableActivityCallback(op, false))
|
||||
FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
hsa_support::DisableActivity(domain, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
if (HipLoader::Instance().Enabled() &&
|
||||
@@ -941,7 +757,7 @@ ROCTRACER_API void roctracer_stop() {
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) {
|
||||
API_METHOD_PREFIX
|
||||
*timestamp = util::timestamp_ns();
|
||||
*timestamp = hsa_support::timestamp_ns();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -985,71 +801,18 @@ __attribute__((destructor)) void destructor() {
|
||||
extern "C" {
|
||||
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee.
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
std::scoped_lock lock(hsa_support::init_mutex);
|
||||
|
||||
// Save the HSA core api and amd_ext api.
|
||||
hsa_support::saved_core_api = *table->core_;
|
||||
hsa_support::saved_amd_ext_api = *table->amd_ext_;
|
||||
|
||||
// Enumerate the agents.
|
||||
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
hsa_support::AgentInfo agent_info;
|
||||
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
|
||||
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_agent_get_info failed");
|
||||
switch (agent_info.type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
static int cpu_agent_count = 0;
|
||||
agent_info.index = cpu_agent_count++;
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
static int gpu_agent_count = 0;
|
||||
agent_info.index = gpu_agent_count++;
|
||||
break;
|
||||
default:
|
||||
static int other_agent_count = 0;
|
||||
agent_info.index = other_agent_count++;
|
||||
break;
|
||||
}
|
||||
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr) != HSA_STATUS_SUCCESS)
|
||||
FATAL_LOGGING("hsa_iterate_agents failed");
|
||||
|
||||
// Install the HSA_OPS intercept
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn =
|
||||
hsa_support::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn =
|
||||
hsa_support::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
|
||||
// Install the HSA_API wrappers
|
||||
hsa_support::InstallCoreApiWrappers(table->core_);
|
||||
hsa_support::InstallAmdExtWrappers(table->amd_ext_);
|
||||
hsa_support::InstallImageExtWrappers(table->image_ext_);
|
||||
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
|
||||
[](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names);
|
||||
hsa_support::Initialize(table);
|
||||
return true;
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT void OnUnload() {
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
}
|
||||
}
|
||||
ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); }
|
||||
|
||||
} // extern "C"
|
||||
@@ -694,7 +694,8 @@ void tool_load() {
|
||||
extern "C" {
|
||||
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee.
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
|
||||
@@ -68,12 +68,6 @@ target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/i
|
||||
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose_ctest)
|
||||
|
||||
## Build hsaco_test reference test
|
||||
add_library(hsaco_test SHARED app/hsaco_test.cpp)
|
||||
target_compile_definitions(hsaco_test PRIVATE AMD_INTERNAL_BUILD)
|
||||
target_link_libraries(hsaco_test hsa-runtime64::hsa-runtime64)
|
||||
add_dependencies(mytest hsaco_test)
|
||||
|
||||
## Build codeobj event test
|
||||
add_library(codeobj_test SHARED app/codeobj_test.cpp)
|
||||
target_include_directories(codeobj_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc)
|
||||
|
||||
@@ -18,68 +18,52 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "inc/roctracer.h"
|
||||
#include "inc/roctracer_hsa.h"
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
namespace {
|
||||
// Check returned HSA API status
|
||||
void check_status(roctracer_status_t status) {
|
||||
inline void CHECK(roctracer_status_t status) {
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) {
|
||||
const char* error_string = roctracer_error_string();
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", roctracer_error_string());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// codeobj callback
|
||||
void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
|
||||
void CodeObjectCallback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
|
||||
const hsa_evt_data_t* evt_data = reinterpret_cast<const hsa_evt_data_t*>(data);
|
||||
const char* uri = evt_data->codeobj.uri;
|
||||
printf(
|
||||
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) "
|
||||
"uri(\"%s\")\n",
|
||||
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
|
||||
evt_data->codeobj.load_delta, uri);
|
||||
free((void*)uri);
|
||||
fflush(stdout);
|
||||
fprintf(stdout,
|
||||
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) "
|
||||
"load_delta(0x%lx) uri(\"%s\") unload(%d)\n",
|
||||
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
|
||||
evt_data->codeobj.load_delta, evt_data->codeobj.uri, evt_data->codeobj.unload);
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
roctracer_status_t status = roctracer_enable_op_callback(
|
||||
ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL);
|
||||
check_status(status);
|
||||
} // namespace
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
extern "C" {
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
CHECK(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ,
|
||||
CodeObjectCallback, nullptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
roctracer_status_t status = roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT);
|
||||
check_status(status);
|
||||
ROCTRACER_EXPORT void OnUnload() {
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT));
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) {
|
||||
// Enable HSA events intercepting
|
||||
settings->hsa_intercepting = 1;
|
||||
// Initialize profiling
|
||||
initialize();
|
||||
}
|
||||
|
||||
// Tool destructor
|
||||
extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
// Final resources cleanup
|
||||
cleanup();
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
printf("constructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() { OnUnloadTool(); }
|
||||
} // extern "C"
|
||||
@@ -1,127 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define HSA_RT(call) \
|
||||
do { \
|
||||
const hsa_status_t status = call; \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("error \"%s\"\n", #call); \
|
||||
fflush(stdout); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// HSA API intercepting primitives
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
|
||||
hsa_ven_amd_loader_1_01_pfn_t loader_api_table{};
|
||||
|
||||
hsa_status_t code_object_callback(hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object, void* arg) {
|
||||
printf("code_object_callback\n");
|
||||
fflush(stdout);
|
||||
|
||||
uint64_t load_base = 0;
|
||||
uint64_t load_size = 0;
|
||||
uint64_t load_delta = 0;
|
||||
uint32_t uri_len = 0;
|
||||
char* uri_str = NULL;
|
||||
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &load_base));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &load_size));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &load_delta));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &uri_len));
|
||||
|
||||
uri_str = (char*)calloc(uri_len + 1, sizeof(char));
|
||||
if (!uri_str) {
|
||||
perror("calloc");
|
||||
abort();
|
||||
}
|
||||
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str));
|
||||
|
||||
printf("load_base(0x%lx)\n", load_base);
|
||||
fflush(stdout);
|
||||
printf("load_size(0x%lx)\n", load_size);
|
||||
fflush(stdout);
|
||||
printf("load_delta(0x%lx)\n", load_delta);
|
||||
fflush(stdout);
|
||||
printf("uri_len(%u)\n", uri_len);
|
||||
fflush(stdout);
|
||||
printf("uri_str(\"%s\")\n", uri_str);
|
||||
fflush(stdout);
|
||||
|
||||
free(uri_str);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char* options) {
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable, code_object_callback, NULL));
|
||||
HSA_RT(hsa_executable_freeze_fn(executable, options));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
printf("OnLoad: begin\n");
|
||||
fflush(stdout);
|
||||
// intercepting hsa_executable_freeze API
|
||||
hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
// Fetching AMD Loader HSA extension API
|
||||
HSA_RT(hsa_system_get_major_extension_table(
|
||||
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &loader_api_table));
|
||||
printf("OnLoad: end\n");
|
||||
fflush(stdout);
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
printf("OnUnload\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
printf("constructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
printf("destructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
@@ -14,7 +14,6 @@ MatrixTranspose_hip_input_trace --check-events .*
|
||||
copy_hsa_trace --check-events .*
|
||||
copy_hsa_input_trace --check-events .*
|
||||
load_unload_reload_trace --check-order .* --ignore-count hsa_agent_get_info
|
||||
hsa_co_trace --check-none
|
||||
code_obj_trace --check-none
|
||||
trace_buffer --check-none
|
||||
memory_pool --check-none
|
||||
|
||||
+1
-5
@@ -169,11 +169,7 @@ unset ROCP_INPUT
|
||||
# Check that the tracer tool can be unloaded and then reloaded.
|
||||
eval_test "Load/Unload/Reload the tracer tool" ./test/load_unload_reload_test load_unload_reload_trace
|
||||
|
||||
export HSA_TOOLS_LIB="./test/libhsaco_test.so"
|
||||
eval_test "tool HSA codeobj" ./test/MatrixTranspose hsa_co_trace
|
||||
|
||||
export ROCP_TOOL_LIB=./test/libcodeobj_test.so
|
||||
export HSA_TOOLS_LIB="librocprofiler64.so"
|
||||
export LD_PRELOAD=./test/libcodeobj_test.so
|
||||
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
|
||||
|
||||
unset LD_PRELOAD
|
||||
|
||||
Reference in New Issue
Block a user