Bring the HSA_EVT callbacks back to the roctracer

Change-Id: I26080b264d7989880ba7e9f00502cc680b2256d7
This commit is contained in:
Laurent Morichetti
2022-06-10 18:07:30 -07:00
rodzic ac3214d32a
commit c2b87b1fd7
16 zmienionych plików z 799 dodań i 465 usunięć
+1
Wyświetl plik
@@ -21,6 +21,7 @@
#ifndef EXT_PROF_PROTOCOL_H_
#define EXT_PROF_PROTOCOL_H_
#include <stdint.h>
#include <stdlib.h>
/* Traced API domains */
+67 -5
Wyświetl plik
@@ -27,8 +27,6 @@
#include <hsa/hsa_ext_amd.h>
#include <hsa_prof_str.h>
#include <rocprofiler/activity.h>
// HSA OP ID enumeration
enum hsa_op_id_t {
HSA_OP_ID_DISPATCH = 0,
@@ -38,9 +36,73 @@ enum hsa_op_id_t {
HSA_OP_ID_NUMBER
};
struct hsa_ops_properties_t {
void* table;
void* reserved1[3];
// HSA EVT ID enumeration
enum hsa_evt_id_t {
HSA_EVT_ID_ALLOCATE = 0, // Memory allocate callback
HSA_EVT_ID_DEVICE = 1, // Device assign callback
HSA_EVT_ID_MEMCOPY = 2, // Memcopy callback
HSA_EVT_ID_SUBMIT = 3, // Packet submission callback
HSA_EVT_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
HSA_EVT_ID_CODEOBJ = 5, // Loading/unloading of device code object
HSA_EVT_ID_NUMBER
};
struct hsa_ops_properties_t {
void* reserved1[4];
};
// HSA EVT data type
typedef struct {
union {
struct {
const void* ptr; // allocated area ptr
size_t size; // allocated area size, zero size means 'free' callback
hsa_amd_segment_t segment; // allocated area's memory segment type
hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag
int is_code; // equal to 1 if code is allocated
} allocate;
struct {
hsa_device_type_t type; // type of assigned device
uint32_t id; // id of assigned device
hsa_agent_t agent; // device HSA agent handle
const void* ptr; // ptr the device is assigned to
} device;
struct {
const void* dst; // memcopy dst ptr
const void* src; // memcopy src ptr
size_t size; // memcopy size bytes
} memcopy;
struct {
const void* packet; // submitted to GPU packet
const char* kernel_name; // kernel name, NULL if not a kernel dispatch packet
hsa_queue_t* queue; // HSA queue the packet was submitted to
uint32_t device_type; // type of device the packet is submitted to
uint32_t device_id; // id of device the packet is submitted to
} submit;
struct {
uint64_t object; // kernel symbol object
const char* name; // kernel symbol name
uint32_t name_length; // kernel symbol name length
int unload; // symbol executable destroy
} ksymbol;
struct {
uint32_t storage_type; // code object storage type
int storage_file; // origin file descriptor
uint64_t memory_base; // origin memory base
uint64_t memory_size; // origin memory size
uint64_t load_base; // code object load base
uint64_t load_size; // code object load size
uint64_t load_delta; // code object load size
uint32_t uri_length; // URI string length (not including the terminating NUL character)
const char* uri; // URI string
int unload; // unload flag
} codeobj;
};
} hsa_evt_data_t;
#endif // INC_ROCTRACER_HSA_H_
+8 -7
Wyświetl plik
@@ -52,6 +52,7 @@ LICENSE = \
header_basic = \
'namespace detail {\n' + \
'template <typename T>\n' + \
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
' using std::operator<<;\n' + \
@@ -111,9 +112,9 @@ def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, a
indent = ""
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
indent = " "
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \"" + name + "=\");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, v." + name + ");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \", \");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \"" + name + "=\");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \", \");\n"
str += " }\n"
if "void" not in mtype:
global_str += str
@@ -181,7 +182,7 @@ def gen_cppheader(infilepath, outfilepath, rank):
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
output_filename_h.write("{\n")
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '{');\n")
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '{');\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
process_struct(output_filename_h, c, cppHeader, "", apiname)
@@ -190,15 +191,15 @@ def gen_cppheader(infilepath, outfilepath, rank):
output_filename_h.write(global_str)
output_filename_h.write(" };\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '}');\n")
output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '}');\n")
output_filename_h.write(" return out;\n")
output_filename_h.write("}\n")
global_str = ''
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
if rank == 1 or rank == 2:
footer = '// end ostream ops for '+ apiname + ' \n'
footer += '};};\n\n'
footer += '};};};\n\n'
output_filename_h.write(footer)
output_filename_h.write(global_ops)
footer = '#endif //__cplusplus\n' + \
+3 -4
Wyświetl plik
@@ -334,8 +334,7 @@ class API_DescrParser:
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
self.cpp_content += '#include \"util/callback_table.h\"\n\n'
self.cpp_content += '#include <atomic>\n'
self.cpp_content += 'namespace roctracer {\n'
self.cpp_content += 'namespace hsa_support {\n\n'
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
@@ -345,7 +344,7 @@ class API_DescrParser:
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
self.cpp_content += '\n};};\n'
self.cpp_content += '\n};\n'
# add code section
def add_section(self, title, gap, fun):
@@ -465,7 +464,7 @@ class API_DescrParser:
def gen_get_name(self, n, name, call, struct):
content = ''
if n == -1:
content += 'static const char* GetApiName(const uint32_t& id) {\n'
content += 'static const char* GetApiName(uint32_t id) {\n'
content += ' switch (id) {\n'
return content
if call != '-':
+1
Wyświetl plik
@@ -24,6 +24,7 @@
#include <sstream>
#include <stdexcept>
#include <string>
#include <sstream>
#define EXC_RAISING(error, stream) \
do { \
+602
Wyświetl plik
@@ -0,0 +1,602 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "hsa_support.h"
#include "correlation_id.h"
#include "exception.h"
#include "loader.h"
#include "memory_pool.h"
#include "roctracer.h"
#include "roctracer_hsa.h"
#include "tracker.h"
#include "util/callback_table.h"
#include "util/logger.h"
#include <hsa/hsa.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <unordered_map>
#include <optional>
#include <mutex>
#include "hsa_prof_str.inline.h"
namespace roctracer::hsa_support {
namespace {
util::CallbackTable<ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_NUMBER> hsa_evt_cb_table;
CoreApiTable saved_core_api{};
AmdExtTable saved_amd_ext_api{};
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
// async copy activity callback
std::mutex init_mutex;
bool async_copy_callback_enabled = false;
MemoryPool* async_copy_callback_memory_pool = nullptr;
struct AgentInfo {
int index;
hsa_device_type_t type;
};
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) {
hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr);
if (status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
hsa_evt_data_t data{};
data.allocate.ptr = *ptr;
data.allocate.size = size;
if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT,
&data.allocate.segment) != HSA_STATUS_SUCCESS ||
saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS,
&data.allocate.global_flag) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_region_get_info failed");
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent,
hsa_access_permission_t access) {
hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access);
if (status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) {
hsa_evt_data_t data{};
data.device.ptr = ptr;
if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) !=
HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_agent_get_info failed");
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) {
hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size);
if (status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_MEMCOPY); callback_fun) {
hsa_evt_data_t data{};
data.memcopy.dst = dst;
data.memcopy.src = src;
data.memcopy.size = size;
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data, callback_arg);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags,
void** ptr) {
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr);
if (size == 0 || status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
hsa_evt_data_t data{};
data.allocate.ptr = *ptr;
data.allocate.size = size;
if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS ||
saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) !=
HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_region_get_info failed");
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
if (std::tie(callback_fun, callback_arg) = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE);
!callback_fun)
return HSA_STATUS_SUCCESS;
// FIXME: Why is this only reported if HSA_EVT_ID_ALLOCATE is also set?
auto callback_data = std::make_tuple(callback_fun, callback_arg, pool, ptr);
auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) {
auto [callback_fun, callback_arg, pool, ptr] =
*reinterpret_cast<decltype(callback_data)*>(iterate_agent_callback_data);
if (hsa_amd_memory_pool_access_t value;
saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn(
agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS ||
value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT)
return HSA_STATUS_SUCCESS;
auto it = agent_info_map.find(agent.handle);
if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map");
hsa_evt_data_t data{};
data.device.type = it->second.type;
data.device.id = it->second.index;
data.device.agent = agent;
data.device.ptr = ptr;
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
return HSA_STATUS_SUCCESS;
};
saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryPoolFreeIntercept(void* ptr) {
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) {
hsa_evt_data_t data{};
data.allocate.ptr = ptr;
data.allocate.size = 0;
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg);
}
return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr);
}
// Agent allow access callback 'hsa_amd_agents_allow_access'
hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents,
const uint32_t* flags, const void* ptr) {
hsa_status_t status =
saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr);
if (status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) {
while (num_agents--) {
hsa_agent_t agent = *agents++;
auto it = agent_info_map.find(agent.handle);
if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map");
hsa_evt_data_t data{};
data.device.type = it->second.type;
data.device.id = it->second.index;
data.device.agent = agent;
data.device.ptr = ptr;
callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg);
}
}
return HSA_STATUS_SUCCESS;
}
struct CodeObjectCallbackArg {
activity_rtapi_callback_t callback_fun;
void* callback_arg;
bool unload;
};
hsa_status_t CodeObjectCallback(hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object, void* arg) {
auto* code_object_callback_arg = static_cast<CodeObjectCallbackArg*>(arg);
hsa_evt_data_t data{};
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE,
&data.codeobj.storage_type) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) {
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE,
&data.codeobj.storage_file) != HSA_STATUS_SUCCESS ||
data.codeobj.storage_file == -1)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.memory_base = data.codeobj.memory_size = 0;
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) {
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE,
&data.codeobj.memory_base) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE,
&data.codeobj.memory_size) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.storage_file = -1;
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) {
return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects?
} else {
FATAL_LOGGING("Unknown code object storage type: " << data.codeobj.storage_type);
}
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
&data.codeobj.load_base) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
&data.codeobj.load_size) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
&data.codeobj.load_delta) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
&data.codeobj.uri_length) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
std::string uri_str(data.codeobj.uri_length, '\0');
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) !=
HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.uri = uri_str.c_str();
data.codeobj.unload = code_object_callback_arg->unload ? 1 : 0;
code_object_callback_arg->callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data,
code_object_callback_arg->callback_arg);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) {
hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options);
if (status != HSA_STATUS_SUCCESS) return status;
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) {
CodeObjectCallbackArg arg = {callback_fun, callback_arg, false};
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
executable, CodeObjectCallback, &arg);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) {
CodeObjectCallbackArg arg = {callback_fun, callback_arg, true};
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
executable, CodeObjectCallback, &arg);
}
return saved_core_api.hsa_executable_destroy_fn(executable);
}
void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
activity_record_t record{};
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
record.op = HSA_OP_ID_COPY;
record.begin_ns = entry->begin;
record.end_ns = entry->end;
record.device_id = 0;
record.correlation_id = entry->correlation_id;
entry->pool->Write(record);
}
hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src,
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
const hsa_dim3_t* dst_offset,
const hsa_pitched_ptr_t* src,
const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
void AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
pool->Write(*record_ptr);
}
} // namespace
roctracer_timestamp_t timestamp_ns() {
uint64_t sysclock;
if (saved_core_api.hsa_system_get_info_fn == nullptr)
FATAL_LOGGING("HSA intercept is not active");
if (hsa_status_t status =
saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
return 0;
else if (status != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_system_get_info failed");
static uint64_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
if (hsa_status_t status = saved_core_api.hsa_system_get_info_fn(
HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
status != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_system_get_info failed");
return (uint64_t)1000000000 / sysclock_hz;
}();
return sysclock * sysclock_period;
}
void Initialize(HsaApiTable* table) {
std::scoped_lock lock(init_mutex);
// Save the HSA core api and amd_ext api.
saved_core_api = *table->core_;
saved_amd_ext_api = *table->amd_ext_;
// Enumerate the agents.
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* data) {
hsa_support::AgentInfo agent_info;
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_agent_get_info failed");
switch (agent_info.type) {
case HSA_DEVICE_TYPE_CPU:
static int cpu_agent_count = 0;
agent_info.index = cpu_agent_count++;
break;
case HSA_DEVICE_TYPE_GPU:
static int gpu_agent_count = 0;
agent_info.index = gpu_agent_count++;
break;
default:
static int other_agent_count = 0;
agent_info.index = other_agent_count++;
break;
}
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
return HSA_STATUS_SUCCESS;
},
nullptr) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_iterate_agents failed");
// Install the code object intercept.
hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn(
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_system_get_major_extension_table failed");
// Install the HSA_OPS intercept
table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept;
// Install the HSA_EVT intercept
table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept;
table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept;
table->core_->hsa_memory_copy_fn = MemoryCopyIntercept;
table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept;
table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept;
table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept;
table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept;
table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept;
// Install the HSA_API wrappers
detail::InstallCoreApiWrappers(table->core_);
detail::InstallAmdExtWrappers(table->amd_ext_);
detail::InstallImageExtWrappers(table->image_ext_);
if (async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
}
void Finalize() {
if (hsa_support::async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
}
const char* GetApiName(uint32_t id) { return detail::GetApiName(id); }
const char* GetEvtName(uint32_t id) {
switch (id) {
case HSA_EVT_ID_ALLOCATE:
return "ALLOCATE";
case HSA_EVT_ID_DEVICE:
return "DEVICE";
case HSA_EVT_ID_MEMCOPY:
return "MEMCOPY";
case HSA_EVT_ID_SUBMIT:
return "SUBMIT";
case HSA_EVT_ID_KSYMBOL:
return "KSYMBOL";
case HSA_EVT_ID_CODEOBJ:
return "CODEOBJ";
case HSA_EVT_ID_NUMBER:
break;
};
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id");
}
const char* GetOpsName(uint32_t id) { return RocpLoader::Instance().GetOpName(id); }
uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); }
void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(init_mutex);
if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
async_copy_callback_enabled = true;
async_copy_callback_memory_pool = reinterpret_cast<MemoryPool*>(pool);
} else {
const bool init_phase = (RocpLoader::GetRef() == nullptr);
if (RocpLoader::GetRef() == nullptr) break;
if (init_phase) {
RocpLoader::Instance().InitActivityCallback(
reinterpret_cast<void*>(AsyncActivityCallback), pool);
}
if (!RocpLoader::Instance().EnableActivityCallback(op, true))
FATAL_LOGGING("HSA::EnableActivityCallback error");
}
break;
case ACTIVITY_DOMAIN_HSA_API:
// FIXME: Add HSA api activities.
break;
case ACTIVITY_DOMAIN_HSA_EVT:
break;
default:
break;
}
}
void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback,
void* user_data) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
break;
case ACTIVITY_DOMAIN_HSA_API:
if (cid >= HSA_API_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA API operation ID(" << cid << ")");
detail::cb_table.Set(cid, callback, user_data);
break;
case ACTIVITY_DOMAIN_HSA_EVT:
if (cid >= HSA_EVT_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA API operation ID(" << cid << ")");
hsa_evt_cb_table.Set(cid, callback, user_data);
break;
default:
break;
}
}
void DisableActivity(roctracer_domain_t domain, uint32_t op) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(init_mutex);
async_copy_callback_enabled = false;
async_copy_callback_memory_pool = nullptr;
if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED ||
!"hsa_amd_profiling_async_copy_enable failed");
}
} else {
if (RocpLoader::GetRef() != nullptr &&
!RocpLoader::Instance().EnableActivityCallback(op, false))
FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")");
}
break;
case ACTIVITY_DOMAIN_HSA_API:
// FIXME: Add HSA api activities.
break;
case ACTIVITY_DOMAIN_HSA_EVT:
break;
default:
break;
}
}
void DisableCallback(roctracer_domain_t domain, uint32_t cid) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
break;
case ACTIVITY_DOMAIN_HSA_API:
if (cid >= HSA_API_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA API operation ID(" << cid << ")");
detail::cb_table.Set(cid, nullptr, nullptr);
break;
case ACTIVITY_DOMAIN_HSA_EVT:
if (cid >= HSA_EVT_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA EVT operation ID(" << cid << ")");
hsa_evt_cb_table.Set(cid, nullptr, nullptr);
break;
default:
break;
}
}
} // namespace roctracer::hsa_support
+50
Wyświetl plik
@@ -0,0 +1,50 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef HSA_SUPPORT_H_
#define HSA_SUPPORT_H_
#include "roctracer.h"
#include "roctracer_hsa.h"
#include <hsa/hsa_api_trace.h>
namespace roctracer::hsa_support {
void Initialize(HsaApiTable* table);
void Finalize();
const char* GetApiName(uint32_t id);
const char* GetEvtName(uint32_t id);
const char* GetOpsName(uint32_t id);
uint32_t GetApiCode(const char* str);
void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool);
void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback,
void* user_data);
void DisableCallback(roctracer_domain_t domain, uint32_t cid);
void DisableActivity(roctracer_domain_t domain, uint32_t op);
uint64_t timestamp_ns();
} // namespace roctracer::hsa_support
#endif // HSA_SUPPORT_H_
+7 -1
Wyświetl plik
@@ -24,6 +24,8 @@
#include <atomic>
#include <mutex>
#include <dlfcn.h>
#include <unistd.h>
#include <sys/syscall.h>
#define ONLD_TRACE(str) \
if (getenv("ROCP_ONLOAD_TRACE")) do { \
@@ -36,7 +38,7 @@ namespace roctracer {
// Base runtime loader class
template <class T> class BaseLoader : public T {
static uint32_t GetPid() { return syscall(__NR_getpid); }
static uint32_t GetPid() { return ::syscall(__NR_getpid); }
public:
typedef std::mutex mutex_t;
@@ -132,8 +134,12 @@ class RocpApi {
}
};
} // namespace roctracer
// HIP runtime library loader class
#include "roctracer_hip.h"
namespace roctracer {
#if STATIC_BUILD
__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
return hipErrorUnknown;
+2
Wyświetl plik
@@ -21,6 +21,8 @@
#ifndef MEMORY_POOL_H_
#define MEMORY_POOL_H_
#include "roctracer.h"
#include <cassert>
#include <condition_variable>
#include <cstdlib>
+24 -261
Wyświetl plik
@@ -40,13 +40,12 @@
#include "correlation_id.h"
#include "journal.h"
#include "loader.h"
#include "hsa_support.h"
#include "memory_pool.h"
#include "tracker.h"
#include "exception.h"
#include "util/logger.h"
#include "hsa_prof_str.inline.h"
#define CHECK_HSA_STATUS(msg, status) \
do { \
if ((status) != HSA_STATUS_SUCCESS) { \
@@ -106,46 +105,11 @@ static inline uint32_t GetTid() {
//
namespace roctracer {
namespace hsa_support {
static CoreApiTable saved_core_api;
static AmdExtTable saved_amd_ext_api;
// async copy activity callback
std::mutex init_mutex;
bool async_copy_callback_enabled = false;
MemoryPool* async_copy_callback_memory_pool = nullptr;
} // namespace hsa_support
namespace ext_support {
roctracer_start_cb_t roctracer_start_cb = nullptr;
roctracer_stop_cb_t roctracer_stop_cb = nullptr;
} // namespace ext_support
namespace util {
roctracer_timestamp_t timestamp_ns() {
uint64_t sysclock;
hsa_status_t status =
hsa_support::saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0;
CHECK_HSA_STATUS("hsa_system_get_info()", status);
static uint64_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
hsa_status_t status = hsa_support::saved_core_api.hsa_system_get_info_fn(
HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
CHECK_HSA_STATUS("hsa_system_get_info()", status);
return (uint64_t)1000000000 / sysclock_hz;
}();
return sysclock * sysclock_period;
}
} // namespace util
struct CallbackJournalData {
roctracer_rtapi_callback_t callback;
void* user_data;
@@ -193,12 +157,12 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
record->op = op_id;
record->process_id = GetPid();
record->thread_id = GetTid();
record->begin_ns = util::timestamp_ns();
record->begin_ns = hsa_support::timestamp_ns();
record->correlation_id = correlation_id;
}
} else {
if (pool != nullptr) {
record->end_ns = util::timestamp_ns();
record->end_ns = hsa_support::timestamp_ns();
if (auto external_id = ExternalCorrelationId()) {
roctracer_record_t ext_record{};
@@ -247,82 +211,6 @@ void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) {
record_ptr->begin_ns, record_ptr->end_ns);
}
namespace hsa_support {
struct AgentInfo {
int index;
hsa_device_type_t type;
};
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
void hsa_async_copy_handler(const Tracker::entry_t* entry) {
activity_record_t record{};
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
record.op = HSA_OP_ID_COPY;
record.begin_ns = entry->begin;
record.end_ns = entry->end;
record.device_id = 0;
record.correlation_id = entry->correlation_id;
entry->pool->Write(record);
}
hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent,
const void* src, hsa_agent_t src_agent,
size_t size, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
if (!async_copy_callback_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
} // namespace hsa_support
void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
pool->Write(*record_ptr);
}
// Logger routines and primitives
util::Logger::mutex_t util::Logger::mutex_;
std::atomic<util::Logger*> util::Logger::instance_{};
@@ -340,6 +228,7 @@ unsigned set_stopped(unsigned val) {
stop_status_value = val;
return ret;
}
} // namespace roctracer
using namespace roctracer;
@@ -367,9 +256,9 @@ ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint
case ACTIVITY_DOMAIN_HSA_API:
return hsa_support::GetApiName(op);
case ACTIVITY_DOMAIN_HSA_EVT:
return RocpLoader::Instance().GetEvtName(op);
return hsa_support::GetEvtName(op);
case ACTIVITY_DOMAIN_HSA_OPS:
return RocpLoader::Instance().GetOpName(op);
return hsa_support::GetOpsName(op);
case ACTIVITY_DOMAIN_HIP_OPS:
return HipLoader::Instance().GetOpName(kind);
case ACTIVITY_DOMAIN_HIP_API:
@@ -460,27 +349,10 @@ static void roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op
roctracer_rtapi_callback_t callback, void* user_data) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
case ACTIVITY_DOMAIN_HSA_API:
case ACTIVITY_DOMAIN_HSA_EVT:
hsa_support::EnableCallback(domain, op, callback, user_data);
break;
case ACTIVITY_DOMAIN_HSA_API: {
#if 0
if (op == HSA_API_ID_DISPATCH) {
if (!RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data))
FATAL_LOGGING("HSA::RegisterApiCallback error(" << op << ") failed");
break;
}
#endif
if (op >= HSA_API_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA API operation ID(" << op << ")");
hsa_support::cb_table.Set(op, callback, user_data);
break;
}
case ACTIVITY_DOMAIN_HSA_EVT: {
if (!RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data))
FATAL_LOGGING("HSA::RegisterEvtCallback error(" << op << ") failed");
break;
}
case ACTIVITY_DOMAIN_HIP_OPS:
break;
case ACTIVITY_DOMAIN_HIP_API: {
@@ -539,19 +411,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t op) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
case ACTIVITY_DOMAIN_HSA_API:
case ACTIVITY_DOMAIN_HSA_EVT:
hsa_support::DisableCallback(domain, op);
break;
case ACTIVITY_DOMAIN_HSA_API: {
#if 0
if (op == HSA_API_ID_DISPATCH && !RocpLoader::Instance().RemoveApiCallback(op))
FATAL_LOGGING("HSA::RemoveActivityCallback error(" << op << ") failed");
break;
#endif
if (op >= HSA_API_ID_NUMBER)
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"invalid HSA API operation ID(" << op << ")");
hsa_support::cb_table.Set(op, nullptr, nullptr);
break;
}
case ACTIVITY_DOMAIN_HIP_OPS:
break;
case ACTIVITY_DOMAIN_HIP_API: {
@@ -568,11 +431,6 @@ static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t o
}
break;
}
case ACTIVITY_DOMAIN_HSA_EVT: {
if (!RocpLoader::Instance().RemoveEvtCallback(op))
FATAL_LOGGING("HSA::RemoveEvtCallback error(" << op << ") failed");
break;
}
case ACTIVITY_DOMAIN_ROCTX: {
if (RocTxLoader::Instance().Enabled() && !RocTxLoader::Instance().RemoveApiCallback(op))
FATAL_LOGGING("ROCTX::RemoveApiCallback(" << op << ") failed");
@@ -656,34 +514,11 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op
roctracer_pool_t* pool) {
assert(pool != nullptr);
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(hsa_support::init_mutex);
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
RocpLoader::Instance();
hsa_support::async_copy_callback_enabled = true;
hsa_support::async_copy_callback_memory_pool = reinterpret_cast<MemoryPool*>(pool);
} else {
const bool init_phase = (RocpLoader::GetRef() == nullptr);
if (RocpLoader::GetRef() == nullptr) break;
if (init_phase) {
RocpLoader::Instance().InitActivityCallback((void*)HSA_AsyncActivityCallback,
(void*)pool);
}
if (!RocpLoader::Instance().EnableActivityCallback(op, true))
FATAL_LOGGING("HSA::EnableActivityCallback error");
}
break;
}
case ACTIVITY_DOMAIN_HSA_API:
break;
case ACTIVITY_DOMAIN_HSA_EVT:
case ACTIVITY_DOMAIN_HSA_OPS:
RocpLoader::Instance();
case ACTIVITY_DOMAIN_HSA_API:
case ACTIVITY_DOMAIN_HSA_EVT:
hsa_support::EnableActivity(domain, op, pool);
break;
case ACTIVITY_DOMAIN_HIP_OPS: {
if (HipLoader::Instance().Enabled() &&
@@ -757,29 +592,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domai
// Disable activity records logging
static void roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t op) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
std::scoped_lock lock(hsa_support::init_mutex);
hsa_support::async_copy_callback_enabled = false;
hsa_support::async_copy_callback_memory_pool = nullptr;
if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED ||
!"hsa_amd_profiling_async_copy_enable failed");
}
} else {
if (RocpLoader::GetRef() == nullptr) break;
if (!RocpLoader::Instance().EnableActivityCallback(op, false))
FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")");
}
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
case ACTIVITY_DOMAIN_HSA_API:
break;
case ACTIVITY_DOMAIN_HSA_EVT:
hsa_support::DisableActivity(domain, op);
break;
case ACTIVITY_DOMAIN_HIP_OPS: {
if (HipLoader::Instance().Enabled() &&
@@ -941,7 +757,7 @@ ROCTRACER_API void roctracer_stop() {
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) {
API_METHOD_PREFIX
*timestamp = util::timestamp_ns();
*timestamp = hsa_support::timestamp_ns();
API_METHOD_SUFFIX
}
@@ -985,71 +801,18 @@ __attribute__((destructor)) void destructor() {
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee.
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
std::scoped_lock lock(hsa_support::init_mutex);
// Save the HSA core api and amd_ext api.
hsa_support::saved_core_api = *table->core_;
hsa_support::saved_amd_ext_api = *table->amd_ext_;
// Enumerate the agents.
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* data) {
hsa_support::AgentInfo agent_info;
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_agent_get_info failed");
switch (agent_info.type) {
case HSA_DEVICE_TYPE_CPU:
static int cpu_agent_count = 0;
agent_info.index = cpu_agent_count++;
break;
case HSA_DEVICE_TYPE_GPU:
static int gpu_agent_count = 0;
agent_info.index = gpu_agent_count++;
break;
default:
static int other_agent_count = 0;
agent_info.index = other_agent_count++;
break;
}
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
return HSA_STATUS_SUCCESS;
},
nullptr) != HSA_STATUS_SUCCESS)
FATAL_LOGGING("hsa_iterate_agents failed");
// Install the HSA_OPS intercept
table->amd_ext_->hsa_amd_memory_async_copy_fn =
hsa_support::hsa_amd_memory_async_copy_interceptor;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn =
hsa_support::hsa_amd_memory_async_copy_rect_interceptor;
// Install the HSA_API wrappers
hsa_support::InstallCoreApiWrappers(table->core_);
hsa_support::InstallAmdExtWrappers(table->amd_ext_);
hsa_support::InstallImageExtWrappers(table->image_ext_);
if (hsa_support::async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
[](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names);
hsa_support::Initialize(table);
return true;
}
ROCTRACER_EXPORT void OnUnload() {
if (hsa_support::async_copy_callback_enabled) {
[[maybe_unused]] hsa_status_t status =
hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
}
}
ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); }
} // extern "C"
+2 -1
Wyświetl plik
@@ -694,7 +694,8 @@ void tool_load() {
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee.
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
// HSA-runtime tool on-load method
-6
Wyświetl plik
@@ -68,12 +68,6 @@ target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/i
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
add_dependencies(mytest MatrixTranspose_ctest)
## Build hsaco_test reference test
add_library(hsaco_test SHARED app/hsaco_test.cpp)
target_compile_definitions(hsaco_test PRIVATE AMD_INTERNAL_BUILD)
target_link_libraries(hsaco_test hsa-runtime64::hsa-runtime64)
add_dependencies(mytest hsaco_test)
## Build codeobj event test
add_library(codeobj_test SHARED app/codeobj_test.cpp)
target_include_directories(codeobj_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc)
+31 -47
Wyświetl plik
@@ -18,68 +18,52 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include "inc/roctracer.h"
#include "inc/roctracer_hsa.h"
#include <rocprofiler/rocprofiler.h>
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
namespace {
// Check returned HSA API status
void check_status(roctracer_status_t status) {
inline void CHECK(roctracer_status_t status) {
if (status != ROCTRACER_STATUS_SUCCESS) {
const char* error_string = roctracer_error_string();
fprintf(stderr, "ERROR: %s\n", error_string);
fprintf(stderr, "ERROR: %s\n", roctracer_error_string());
abort();
}
}
// codeobj callback
void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
void CodeObjectCallback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
const hsa_evt_data_t* evt_data = reinterpret_cast<const hsa_evt_data_t*>(data);
const char* uri = evt_data->codeobj.uri;
printf(
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) "
"uri(\"%s\")\n",
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
evt_data->codeobj.load_delta, uri);
free((void*)uri);
fflush(stdout);
fprintf(stdout,
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) "
"load_delta(0x%lx) uri(\"%s\") unload(%d)\n",
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
evt_data->codeobj.load_delta, evt_data->codeobj.uri, evt_data->codeobj.unload);
}
void initialize() {
roctracer_status_t status = roctracer_enable_op_callback(
ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL);
check_status(status);
} // namespace
#include <hsa/hsa_api_trace.h>
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
CHECK(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ,
CodeObjectCallback, nullptr));
return true;
}
void cleanup() {
roctracer_status_t status = roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT);
check_status(status);
ROCTRACER_EXPORT void OnUnload() {
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT));
}
// Tool constructor
extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) {
// Enable HSA events intercepting
settings->hsa_intercepting = 1;
// Initialize profiling
initialize();
}
// Tool destructor
extern "C" PUBLIC_API void OnUnloadTool() {
// Final resources cleanup
cleanup();
}
extern "C" CONSTRUCTOR_API void constructor() {
printf("constructor\n");
fflush(stdout);
}
extern "C" DESTRUCTOR_API void destructor() { OnUnloadTool(); }
} // extern "C"
-127
Wyświetl plik
@@ -1,127 +0,0 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <stdio.h>
#include <stdlib.h>
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define HSA_RT(call) \
do { \
const hsa_status_t status = call; \
if (status != HSA_STATUS_SUCCESS) { \
printf("error \"%s\"\n", #call); \
fflush(stdout); \
abort(); \
} \
} while (0)
// HSA API intercepting primitives
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
hsa_ven_amd_loader_1_01_pfn_t loader_api_table{};
hsa_status_t code_object_callback(hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object, void* arg) {
printf("code_object_callback\n");
fflush(stdout);
uint64_t load_base = 0;
uint64_t load_size = 0;
uint64_t load_delta = 0;
uint32_t uri_len = 0;
char* uri_str = NULL;
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &load_base));
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &load_size));
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &load_delta));
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &uri_len));
uri_str = (char*)calloc(uri_len + 1, sizeof(char));
if (!uri_str) {
perror("calloc");
abort();
}
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str));
printf("load_base(0x%lx)\n", load_base);
fflush(stdout);
printf("load_size(0x%lx)\n", load_size);
fflush(stdout);
printf("load_delta(0x%lx)\n", load_delta);
fflush(stdout);
printf("uri_len(%u)\n", uri_len);
fflush(stdout);
printf("uri_str(\"%s\")\n", uri_str);
fflush(stdout);
free(uri_str);
return HSA_STATUS_SUCCESS;
}
hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char* options) {
HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
executable, code_object_callback, NULL));
HSA_RT(hsa_executable_freeze_fn(executable, options));
return HSA_STATUS_SUCCESS;
}
// HSA-runtime tool on-load method
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count,
const char* const* failed_tool_names) {
printf("OnLoad: begin\n");
fflush(stdout);
// intercepting hsa_executable_freeze API
hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
// Fetching AMD Loader HSA extension API
HSA_RT(hsa_system_get_major_extension_table(
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &loader_api_table));
printf("OnLoad: end\n");
fflush(stdout);
return true;
}
extern "C" PUBLIC_API void OnUnload() {
printf("OnUnload\n");
fflush(stdout);
}
extern "C" CONSTRUCTOR_API void constructor() {
printf("constructor\n");
fflush(stdout);
}
extern "C" DESTRUCTOR_API void destructor() {
printf("destructor\n");
fflush(stdout);
}
@@ -14,7 +14,6 @@ MatrixTranspose_hip_input_trace --check-events .*
copy_hsa_trace --check-events .*
copy_hsa_input_trace --check-events .*
load_unload_reload_trace --check-order .* --ignore-count hsa_agent_get_info
hsa_co_trace --check-none
code_obj_trace --check-none
trace_buffer --check-none
memory_pool --check-none
+1 -5
Wyświetl plik
@@ -169,11 +169,7 @@ unset ROCP_INPUT
# Check that the tracer tool can be unloaded and then reloaded.
eval_test "Load/Unload/Reload the tracer tool" ./test/load_unload_reload_test load_unload_reload_trace
export HSA_TOOLS_LIB="./test/libhsaco_test.so"
eval_test "tool HSA codeobj" ./test/MatrixTranspose hsa_co_trace
export ROCP_TOOL_LIB=./test/libcodeobj_test.so
export HSA_TOOLS_LIB="librocprofiler64.so"
export LD_PRELOAD=./test/libcodeobj_test.so
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
unset LD_PRELOAD