diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index 9b88acbbbd..1f9333abce 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -21,6 +21,7 @@ #ifndef EXT_PROF_PROTOCOL_H_ #define EXT_PROF_PROTOCOL_H_ +#include #include /* Traced API domains */ diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 777d4d8ba6..5f86c5a21e 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -27,8 +27,6 @@ #include #include -#include - // HSA OP ID enumeration enum hsa_op_id_t { HSA_OP_ID_DISPATCH = 0, @@ -38,9 +36,73 @@ enum hsa_op_id_t { HSA_OP_ID_NUMBER }; -struct hsa_ops_properties_t { - void* table; - void* reserved1[3]; +// HSA EVT ID enumeration +enum hsa_evt_id_t { + HSA_EVT_ID_ALLOCATE = 0, // Memory allocate callback + HSA_EVT_ID_DEVICE = 1, // Device assign callback + HSA_EVT_ID_MEMCOPY = 2, // Memcopy callback + HSA_EVT_ID_SUBMIT = 3, // Packet submission callback + HSA_EVT_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol + HSA_EVT_ID_CODEOBJ = 5, // Loading/unloading of device code object + HSA_EVT_ID_NUMBER }; +struct hsa_ops_properties_t { + void* reserved1[4]; +}; + +// HSA EVT data type +typedef struct { + union { + struct { + const void* ptr; // allocated area ptr + size_t size; // allocated area size, zero size means 'free' callback + hsa_amd_segment_t segment; // allocated area's memory segment type + hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag + int is_code; // equal to 1 if code is allocated + } allocate; + + struct { + hsa_device_type_t type; // type of assigned device + uint32_t id; // id of assigned device + hsa_agent_t agent; // device HSA agent handle + const void* ptr; // ptr the device is assigned to + } device; + + struct { + const void* dst; // memcopy dst ptr + const void* src; // memcopy src ptr + size_t size; // memcopy size bytes + } memcopy; + + struct { + const void* packet; // submitted to GPU packet + const char* kernel_name; // kernel name, NULL if not a kernel dispatch packet + hsa_queue_t* queue; // HSA queue the packet was submitted to + uint32_t device_type; // type of device the packet is submitted to + uint32_t device_id; // id of device the packet is submitted to + } submit; + + struct { + uint64_t object; // kernel symbol object + const char* name; // kernel symbol name + uint32_t name_length; // kernel symbol name length + int unload; // symbol executable destroy + } ksymbol; + + struct { + uint32_t storage_type; // code object storage type + int storage_file; // origin file descriptor + uint64_t memory_base; // origin memory base + uint64_t memory_size; // origin memory size + uint64_t load_base; // code object load base + uint64_t load_size; // code object load size + uint64_t load_delta; // code object load size + uint32_t uri_length; // URI string length (not including the terminating NUL character) + const char* uri; // URI string + int unload; // unload flag + } codeobj; + }; +} hsa_evt_data_t; + #endif // INC_ROCTRACER_HSA_H_ diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py index 0f839acc39..f40c9ced6f 100755 --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -52,6 +52,7 @@ LICENSE = \ header_basic = \ +'namespace detail {\n' + \ 'template \n' + \ ' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \ ' using std::operator<<;\n' + \ @@ -111,9 +112,9 @@ def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, a indent = "" str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n" indent = " " - str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \"" + name + "=\");\n" - str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, v." + name + ");\n" - str += indent + " roctracer::" + apiname.lower() + "_support::operator<<(out, \", \");\n" + str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \"" + name + "=\");\n" + str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n" + str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \", \");\n" str += " }\n" if "void" not in mtype: global_str += str @@ -181,7 +182,7 @@ def gen_cppheader(infilepath, outfilepath, rank): if len(cppHeader.classes[c]["properties"]["public"]) != 0: output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n") output_filename_h.write("{\n") - output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '{');\n") + output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '{');\n") output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n") output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" ) process_struct(output_filename_h, c, cppHeader, "", apiname) @@ -190,15 +191,15 @@ def gen_cppheader(infilepath, outfilepath, rank): output_filename_h.write(global_str) output_filename_h.write(" };\n") output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n") - output_filename_h.write(" roctracer::" + apiname.lower() + "_support::operator<<(out, '}');\n") + output_filename_h.write(" roctracer::" + apiname.lower() + "_support::detail::operator<<(out, '}');\n") output_filename_h.write(" return out;\n") output_filename_h.write("}\n") global_str = '' - global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::operator<<(out, v);\n" + " return out;\n" + "}\n\n" + global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n" if rank == 1 or rank == 2: footer = '// end ostream ops for '+ apiname + ' \n' - footer += '};};\n\n' + footer += '};};};\n\n' output_filename_h.write(footer) output_filename_h.write(global_ops) footer = '#endif //__cplusplus\n' + \ diff --git a/script/hsaap.py b/script/hsaap.py index 0b592bd6d6..866f1b569b 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -334,8 +334,7 @@ class API_DescrParser: self.cpp_content += '#include \n' self.cpp_content += '#include \"util/callback_table.h\"\n\n' self.cpp_content += '#include \n' - self.cpp_content += 'namespace roctracer {\n' - self.cpp_content += 'namespace hsa_support {\n\n' + self.cpp_content += 'namespace roctracer::hsa_support::detail {\n' self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n' self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n' @@ -345,7 +344,7 @@ class API_DescrParser: self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept) self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name) self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code) - self.cpp_content += '\n};};\n' + self.cpp_content += '\n};\n' # add code section def add_section(self, title, gap, fun): @@ -465,7 +464,7 @@ class API_DescrParser: def gen_get_name(self, n, name, call, struct): content = '' if n == -1: - content += 'static const char* GetApiName(const uint32_t& id) {\n' + content += 'static const char* GetApiName(uint32_t id) {\n' content += ' switch (id) {\n' return content if call != '-': diff --git a/src/roctracer/exception.h b/src/roctracer/exception.h index a2a33a0361..9efe2ee574 100644 --- a/src/roctracer/exception.h +++ b/src/roctracer/exception.h @@ -24,6 +24,7 @@ #include #include #include +#include #define EXC_RAISING(error, stream) \ do { \ diff --git a/src/roctracer/hsa_support.cpp b/src/roctracer/hsa_support.cpp new file mode 100644 index 0000000000..dcd3c4764a --- /dev/null +++ b/src/roctracer/hsa_support.cpp @@ -0,0 +1,602 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "hsa_support.h" + +#include "correlation_id.h" +#include "exception.h" +#include "loader.h" +#include "memory_pool.h" +#include "roctracer.h" +#include "roctracer_hsa.h" +#include "tracker.h" +#include "util/callback_table.h" +#include "util/logger.h" + +#include +#include +#include +#include +#include + +#include "hsa_prof_str.inline.h" + +namespace roctracer::hsa_support { + +namespace { + +util::CallbackTable hsa_evt_cb_table; + +CoreApiTable saved_core_api{}; +AmdExtTable saved_amd_ext_api{}; +hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{}; + +// async copy activity callback +std::mutex init_mutex; +bool async_copy_callback_enabled = false; +MemoryPool* async_copy_callback_memory_pool = nullptr; + +struct AgentInfo { + int index; + hsa_device_type_t type; +}; +std::unordered_map agent_info_map; + +hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) { + hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr); + if (status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) { + hsa_evt_data_t data{}; + data.allocate.ptr = *ptr; + data.allocate.size = size; + if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT, + &data.allocate.segment) != HSA_STATUS_SUCCESS || + saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS, + &data.allocate.global_flag) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_region_get_info failed"); + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent, + hsa_access_permission_t access) { + hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access); + if (status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) { + hsa_evt_data_t data{}; + data.device.ptr = ptr; + if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) != + HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_agent_get_info failed"); + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) { + hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size); + if (status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_MEMCOPY); callback_fun) { + hsa_evt_data_t data{}; + data.memcopy.dst = dst; + data.memcopy.src = src; + data.memcopy.size = size; + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data, callback_arg); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags, + void** ptr) { + hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr); + if (size == 0 || status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) { + hsa_evt_data_t data{}; + data.allocate.ptr = *ptr; + data.allocate.size = size; + + if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn( + pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS || + saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) != + HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_region_get_info failed"); + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg); + + if (std::tie(callback_fun, callback_arg) = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); + !callback_fun) + return HSA_STATUS_SUCCESS; + + // FIXME: Why is this only reported if HSA_EVT_ID_ALLOCATE is also set? + auto callback_data = std::make_tuple(callback_fun, callback_arg, pool, ptr); + auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) { + auto [callback_fun, callback_arg, pool, ptr] = + *reinterpret_cast(iterate_agent_callback_data); + + if (hsa_amd_memory_pool_access_t value; + saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn( + agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS || + value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT) + return HSA_STATUS_SUCCESS; + + auto it = agent_info_map.find(agent.handle); + if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map"); + + hsa_evt_data_t data{}; + data.device.type = it->second.type; + data.device.id = it->second.index; + data.device.agent = agent; + data.device.ptr = ptr; + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg); + return HSA_STATUS_SUCCESS; + }; + saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t MemoryPoolFreeIntercept(void* ptr) { + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_ALLOCATE); callback_fun) { + hsa_evt_data_t data{}; + data.allocate.ptr = ptr; + data.allocate.size = 0; + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data, callback_arg); + } + + return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr); +} + +// Agent allow access callback 'hsa_amd_agents_allow_access' +hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents, + const uint32_t* flags, const void* ptr) { + hsa_status_t status = + saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr); + if (status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_DEVICE); callback_fun) { + while (num_agents--) { + hsa_agent_t agent = *agents++; + auto it = agent_info_map.find(agent.handle); + if (it == agent_info_map.end()) FATAL_LOGGING("agent was not found in the agent_info map"); + + hsa_evt_data_t data{}; + data.device.type = it->second.type; + data.device.id = it->second.index; + data.device.agent = agent; + data.device.ptr = ptr; + + callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data, callback_arg); + } + } + return HSA_STATUS_SUCCESS; +} + +struct CodeObjectCallbackArg { + activity_rtapi_callback_t callback_fun; + void* callback_arg; + bool unload; +}; + +hsa_status_t CodeObjectCallback(hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, void* arg) { + auto* code_object_callback_arg = static_cast(arg); + hsa_evt_data_t data{}; + + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE, + &data.codeobj.storage_type) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + + if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) { + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE, + &data.codeobj.storage_file) != HSA_STATUS_SUCCESS || + data.codeobj.storage_file == -1) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + data.codeobj.memory_base = data.codeobj.memory_size = 0; + } else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) { + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE, + &data.codeobj.memory_base) != HSA_STATUS_SUCCESS || + hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE, + &data.codeobj.memory_size) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + data.codeobj.storage_file = -1; + } else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) { + return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects? + } else { + FATAL_LOGGING("Unknown code object storage type: " << data.codeobj.storage_type); + } + + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, + &data.codeobj.load_base) != HSA_STATUS_SUCCESS || + hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, + &data.codeobj.load_size) != HSA_STATUS_SUCCESS || + hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, + &data.codeobj.load_delta) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, + &data.codeobj.uri_length) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + + std::string uri_str(data.codeobj.uri_length, '\0'); + if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) != + HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_ven_amd_loader_loaded_code_object_get_info failed"); + + data.codeobj.uri = uri_str.c_str(); + data.codeobj.unload = code_object_callback_arg->unload ? 1 : 0; + code_object_callback_arg->callback_fun(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data, + code_object_callback_arg->callback_arg); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) { + hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options); + if (status != HSA_STATUS_SUCCESS) return status; + + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) { + CodeObjectCallbackArg arg = {callback_fun, callback_arg, false}; + hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + executable, CodeObjectCallback, &arg); + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) { + if (auto [callback_fun, callback_arg] = hsa_evt_cb_table.Get(HSA_EVT_ID_CODEOBJ); callback_fun) { + CodeObjectCallbackArg arg = {callback_fun, callback_arg, true}; + hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + executable, CodeObjectCallback, &arg); + } + + return saved_core_api.hsa_executable_destroy_fn(executable); +} + +void MemoryASyncCopyHandler(const Tracker::entry_t* entry) { + activity_record_t record{}; + record.domain = ACTIVITY_DOMAIN_HSA_OPS; + record.op = HSA_OP_ID_COPY; + record.begin_ns = entry->begin; + record.end_ns = entry->end; + record.device_id = 0; + record.correlation_id = entry->correlation_id; + entry->pool->Write(record); +} + +hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src, + hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, + const hsa_signal_t* dep_signals, + hsa_signal_t completion_signal) { + if (!async_copy_callback_enabled) { + return saved_amd_ext_api.hsa_amd_memory_async_copy_fn( + dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); + } + + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = MemoryASyncCopyHandler; + entry->pool = async_copy_callback_memory_pool; + entry->correlation_id = CorrelationId(); + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + + hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn( + dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); + if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); + + return status; +} + +hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, + const hsa_dim3_t* dst_offset, + const hsa_pitched_ptr_t* src, + const hsa_dim3_t* src_offset, const hsa_dim3_t* range, + hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir, + uint32_t num_dep_signals, const hsa_signal_t* dep_signals, + hsa_signal_t completion_signal) { + if (!async_copy_callback_enabled) { + return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( + dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, + completion_signal); + } + + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = MemoryASyncCopyHandler; + entry->pool = async_copy_callback_memory_pool; + entry->correlation_id = CorrelationId(); + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + + hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( + dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, + entry->signal); + if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); + + return status; +} + +void AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { + MemoryPool* pool = reinterpret_cast(arg); + roctracer_record_t* record_ptr = reinterpret_cast(record); + record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS; + pool->Write(*record_ptr); +} + +} // namespace + +roctracer_timestamp_t timestamp_ns() { + uint64_t sysclock; + + if (saved_core_api.hsa_system_get_info_fn == nullptr) + FATAL_LOGGING("HSA intercept is not active"); + + if (hsa_status_t status = + saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); + status == HSA_STATUS_ERROR_NOT_INITIALIZED) + return 0; + else if (status != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_system_get_info failed"); + + static uint64_t sysclock_period = []() { + uint64_t sysclock_hz = 0; + if (hsa_status_t status = saved_core_api.hsa_system_get_info_fn( + HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + status != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_system_get_info failed"); + + return (uint64_t)1000000000 / sysclock_hz; + }(); + + return sysclock * sysclock_period; +} + +void Initialize(HsaApiTable* table) { + std::scoped_lock lock(init_mutex); + + // Save the HSA core api and amd_ext api. + saved_core_api = *table->core_; + saved_amd_ext_api = *table->amd_ext_; + + // Enumerate the agents. + if (hsa_support::saved_core_api.hsa_iterate_agents_fn( + [](hsa_agent_t agent, void* data) { + hsa_support::AgentInfo agent_info; + if (hsa_support::saved_core_api.hsa_agent_get_info_fn( + agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_agent_get_info failed"); + switch (agent_info.type) { + case HSA_DEVICE_TYPE_CPU: + static int cpu_agent_count = 0; + agent_info.index = cpu_agent_count++; + break; + case HSA_DEVICE_TYPE_GPU: + static int gpu_agent_count = 0; + agent_info.index = gpu_agent_count++; + break; + default: + static int other_agent_count = 0; + agent_info.index = other_agent_count++; + break; + } + hsa_support::agent_info_map.emplace(agent.handle, agent_info); + return HSA_STATUS_SUCCESS; + }, + nullptr) != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_iterate_agents failed"); + + // Install the code object intercept. + hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn( + HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api); + if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_system_get_major_extension_table failed"); + + // Install the HSA_OPS intercept + table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept; + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept; + + // Install the HSA_EVT intercept + table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept; + table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept; + table->core_->hsa_memory_copy_fn = MemoryCopyIntercept; + table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept; + table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept; + table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept; + table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept; + table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept; + + // Install the HSA_API wrappers + detail::InstallCoreApiWrappers(table->core_); + detail::InstallAmdExtWrappers(table->amd_ext_); + detail::InstallImageExtWrappers(table->image_ext_); + + if (async_copy_callback_enabled) { + [[maybe_unused]] hsa_status_t status = + saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true); + assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); + } +} + +void Finalize() { + if (hsa_support::async_copy_callback_enabled) { + [[maybe_unused]] hsa_status_t status = + hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false); + assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); + } +} + +const char* GetApiName(uint32_t id) { return detail::GetApiName(id); } + +const char* GetEvtName(uint32_t id) { + switch (id) { + case HSA_EVT_ID_ALLOCATE: + return "ALLOCATE"; + case HSA_EVT_ID_DEVICE: + return "DEVICE"; + case HSA_EVT_ID_MEMCOPY: + return "MEMCOPY"; + case HSA_EVT_ID_SUBMIT: + return "SUBMIT"; + case HSA_EVT_ID_KSYMBOL: + return "KSYMBOL"; + case HSA_EVT_ID_CODEOBJ: + return "CODEOBJ"; + case HSA_EVT_ID_NUMBER: + break; + }; + throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id"); +} + +const char* GetOpsName(uint32_t id) { return RocpLoader::Instance().GetOpName(id); } + +uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); } + +void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) { + switch (domain) { + case ACTIVITY_DOMAIN_HSA_OPS: + if (op == HSA_OP_ID_COPY) { + std::scoped_lock lock(init_mutex); + + if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) { + [[maybe_unused]] hsa_status_t status = + saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true); + assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); + } + async_copy_callback_enabled = true; + async_copy_callback_memory_pool = reinterpret_cast(pool); + } else { + const bool init_phase = (RocpLoader::GetRef() == nullptr); + if (RocpLoader::GetRef() == nullptr) break; + if (init_phase) { + RocpLoader::Instance().InitActivityCallback( + reinterpret_cast(AsyncActivityCallback), pool); + } + if (!RocpLoader::Instance().EnableActivityCallback(op, true)) + FATAL_LOGGING("HSA::EnableActivityCallback error"); + } + break; + case ACTIVITY_DOMAIN_HSA_API: + // FIXME: Add HSA api activities. + break; + case ACTIVITY_DOMAIN_HSA_EVT: + break; + default: + break; + } +} + +void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback, + void* user_data) { + switch (domain) { + case ACTIVITY_DOMAIN_HSA_OPS: + break; + case ACTIVITY_DOMAIN_HSA_API: + if (cid >= HSA_API_ID_NUMBER) + EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, + "invalid HSA API operation ID(" << cid << ")"); + + detail::cb_table.Set(cid, callback, user_data); + break; + case ACTIVITY_DOMAIN_HSA_EVT: + if (cid >= HSA_EVT_ID_NUMBER) + EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, + "invalid HSA API operation ID(" << cid << ")"); + + hsa_evt_cb_table.Set(cid, callback, user_data); + break; + default: + break; + } +} + +void DisableActivity(roctracer_domain_t domain, uint32_t op) { + switch (domain) { + case ACTIVITY_DOMAIN_HSA_OPS: + if (op == HSA_OP_ID_COPY) { + std::scoped_lock lock(init_mutex); + + async_copy_callback_enabled = false; + async_copy_callback_memory_pool = nullptr; + + if (saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) { + [[maybe_unused]] hsa_status_t status = + saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false); + assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED || + !"hsa_amd_profiling_async_copy_enable failed"); + } + } else { + if (RocpLoader::GetRef() != nullptr && + !RocpLoader::Instance().EnableActivityCallback(op, false)) + FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")"); + } + break; + case ACTIVITY_DOMAIN_HSA_API: + // FIXME: Add HSA api activities. + break; + case ACTIVITY_DOMAIN_HSA_EVT: + break; + default: + break; + } +} + +void DisableCallback(roctracer_domain_t domain, uint32_t cid) { + switch (domain) { + case ACTIVITY_DOMAIN_HSA_OPS: + break; + case ACTIVITY_DOMAIN_HSA_API: + if (cid >= HSA_API_ID_NUMBER) + EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, + "invalid HSA API operation ID(" << cid << ")"); + detail::cb_table.Set(cid, nullptr, nullptr); + break; + case ACTIVITY_DOMAIN_HSA_EVT: + if (cid >= HSA_EVT_ID_NUMBER) + EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, + "invalid HSA EVT operation ID(" << cid << ")"); + hsa_evt_cb_table.Set(cid, nullptr, nullptr); + break; + default: + break; + } +} + +} // namespace roctracer::hsa_support diff --git a/src/roctracer/hsa_support.h b/src/roctracer/hsa_support.h new file mode 100644 index 0000000000..563df51712 --- /dev/null +++ b/src/roctracer/hsa_support.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HSA_SUPPORT_H_ +#define HSA_SUPPORT_H_ + +#include "roctracer.h" +#include "roctracer_hsa.h" + +#include + +namespace roctracer::hsa_support { + +void Initialize(HsaApiTable* table); +void Finalize(); + +const char* GetApiName(uint32_t id); +const char* GetEvtName(uint32_t id); +const char* GetOpsName(uint32_t id); +uint32_t GetApiCode(const char* str); + +void EnableActivity(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool); +void EnableCallback(roctracer_domain_t domain, uint32_t cid, roctracer_rtapi_callback_t callback, + void* user_data); + +void DisableCallback(roctracer_domain_t domain, uint32_t cid); +void DisableActivity(roctracer_domain_t domain, uint32_t op); + +uint64_t timestamp_ns(); + +} // namespace roctracer::hsa_support + +#endif // HSA_SUPPORT_H_ diff --git a/src/roctracer/loader.h b/src/roctracer/loader.h index 8b097bc26b..286d0bc1c7 100644 --- a/src/roctracer/loader.h +++ b/src/roctracer/loader.h @@ -24,6 +24,8 @@ #include #include #include +#include +#include #define ONLD_TRACE(str) \ if (getenv("ROCP_ONLOAD_TRACE")) do { \ @@ -36,7 +38,7 @@ namespace roctracer { // Base runtime loader class template class BaseLoader : public T { - static uint32_t GetPid() { return syscall(__NR_getpid); } + static uint32_t GetPid() { return ::syscall(__NR_getpid); } public: typedef std::mutex mutex_t; @@ -132,8 +134,12 @@ class RocpApi { } }; +} // namespace roctracer + // HIP runtime library loader class #include "roctracer_hip.h" + +namespace roctracer { #if STATIC_BUILD __attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { return hipErrorUnknown; diff --git a/src/roctracer/memory_pool.h b/src/roctracer/memory_pool.h index d7ff898c8f..6522dd4908 100644 --- a/src/roctracer/memory_pool.h +++ b/src/roctracer/memory_pool.h @@ -21,6 +21,8 @@ #ifndef MEMORY_POOL_H_ #define MEMORY_POOL_H_ +#include "roctracer.h" + #include #include #include diff --git a/src/roctracer/roctracer.cpp b/src/roctracer/roctracer.cpp index 8ac6d264b1..6b886ef8b9 100644 --- a/src/roctracer/roctracer.cpp +++ b/src/roctracer/roctracer.cpp @@ -40,13 +40,12 @@ #include "correlation_id.h" #include "journal.h" #include "loader.h" +#include "hsa_support.h" #include "memory_pool.h" #include "tracker.h" #include "exception.h" #include "util/logger.h" -#include "hsa_prof_str.inline.h" - #define CHECK_HSA_STATUS(msg, status) \ do { \ if ((status) != HSA_STATUS_SUCCESS) { \ @@ -106,46 +105,11 @@ static inline uint32_t GetTid() { // namespace roctracer { -namespace hsa_support { - -static CoreApiTable saved_core_api; -static AmdExtTable saved_amd_ext_api; - -// async copy activity callback -std::mutex init_mutex; -bool async_copy_callback_enabled = false; -MemoryPool* async_copy_callback_memory_pool = nullptr; - -} // namespace hsa_support - namespace ext_support { roctracer_start_cb_t roctracer_start_cb = nullptr; roctracer_stop_cb_t roctracer_stop_cb = nullptr; } // namespace ext_support -namespace util { - -roctracer_timestamp_t timestamp_ns() { - uint64_t sysclock; - - hsa_status_t status = - hsa_support::saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); - if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0; - CHECK_HSA_STATUS("hsa_system_get_info()", status); - - static uint64_t sysclock_period = []() { - uint64_t sysclock_hz = 0; - hsa_status_t status = hsa_support::saved_core_api.hsa_system_get_info_fn( - HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - CHECK_HSA_STATUS("hsa_system_get_info()", status); - return (uint64_t)1000000000 / sysclock_hz; - }(); - - return sysclock * sysclock_period; -} - -} // namespace util - struct CallbackJournalData { roctracer_rtapi_callback_t callback; void* user_data; @@ -193,12 +157,12 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_ record->op = op_id; record->process_id = GetPid(); record->thread_id = GetTid(); - record->begin_ns = util::timestamp_ns(); + record->begin_ns = hsa_support::timestamp_ns(); record->correlation_id = correlation_id; } } else { if (pool != nullptr) { - record->end_ns = util::timestamp_ns(); + record->end_ns = hsa_support::timestamp_ns(); if (auto external_id = ExternalCorrelationId()) { roctracer_record_t ext_record{}; @@ -247,82 +211,6 @@ void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) { record_ptr->begin_ns, record_ptr->end_ns); } -namespace hsa_support { - -struct AgentInfo { - int index; - hsa_device_type_t type; -}; -std::unordered_map agent_info_map; - -void hsa_async_copy_handler(const Tracker::entry_t* entry) { - activity_record_t record{}; - record.domain = ACTIVITY_DOMAIN_HSA_OPS; - record.op = HSA_OP_ID_COPY; - record.begin_ns = entry->begin; - record.end_ns = entry->end; - record.device_id = 0; - record.correlation_id = entry->correlation_id; - entry->pool->Write(record); -} - -hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent, - const void* src, hsa_agent_t src_agent, - size_t size, uint32_t num_dep_signals, - const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal) { - if (!async_copy_callback_enabled) { - return saved_amd_ext_api.hsa_amd_memory_async_copy_fn( - dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); - } - - Tracker::entry_t* entry = new Tracker::entry_t(); - entry->handler = hsa_async_copy_handler; - entry->pool = async_copy_callback_memory_pool; - entry->correlation_id = CorrelationId(); - Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - - hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn( - dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); - if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); - - return status; -} - -hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( - const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, - const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent, - hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal) { - if (!async_copy_callback_enabled) { - return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( - dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, - completion_signal); - } - - Tracker::entry_t* entry = new Tracker::entry_t(); - entry->handler = hsa_async_copy_handler; - entry->pool = async_copy_callback_memory_pool; - entry->correlation_id = CorrelationId(); - Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - - hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( - dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, - entry->signal); - if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); - - return status; -} - -} // namespace hsa_support - -void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { - MemoryPool* pool = reinterpret_cast(arg); - roctracer_record_t* record_ptr = reinterpret_cast(record); - record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS; - pool->Write(*record_ptr); -} - // Logger routines and primitives util::Logger::mutex_t util::Logger::mutex_; std::atomic util::Logger::instance_{}; @@ -340,6 +228,7 @@ unsigned set_stopped(unsigned val) { stop_status_value = val; return ret; } + } // namespace roctracer using namespace roctracer; @@ -367,9 +256,9 @@ ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint case ACTIVITY_DOMAIN_HSA_API: return hsa_support::GetApiName(op); case ACTIVITY_DOMAIN_HSA_EVT: - return RocpLoader::Instance().GetEvtName(op); + return hsa_support::GetEvtName(op); case ACTIVITY_DOMAIN_HSA_OPS: - return RocpLoader::Instance().GetOpName(op); + return hsa_support::GetOpsName(op); case ACTIVITY_DOMAIN_HIP_OPS: return HipLoader::Instance().GetOpName(kind); case ACTIVITY_DOMAIN_HIP_API: @@ -460,27 +349,10 @@ static void roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op roctracer_rtapi_callback_t callback, void* user_data) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: + case ACTIVITY_DOMAIN_HSA_API: + case ACTIVITY_DOMAIN_HSA_EVT: + hsa_support::EnableCallback(domain, op, callback, user_data); break; - case ACTIVITY_DOMAIN_HSA_API: { -#if 0 - if (op == HSA_API_ID_DISPATCH) { - if (!RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data)) - FATAL_LOGGING("HSA::RegisterApiCallback error(" << op << ") failed"); - break; - } -#endif - if (op >= HSA_API_ID_NUMBER) - EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, - "invalid HSA API operation ID(" << op << ")"); - - hsa_support::cb_table.Set(op, callback, user_data); - break; - } - case ACTIVITY_DOMAIN_HSA_EVT: { - if (!RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data)) - FATAL_LOGGING("HSA::RegisterEvtCallback error(" << op << ") failed"); - break; - } case ACTIVITY_DOMAIN_HIP_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { @@ -539,19 +411,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback( static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t op) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: + case ACTIVITY_DOMAIN_HSA_API: + case ACTIVITY_DOMAIN_HSA_EVT: + hsa_support::DisableCallback(domain, op); break; - case ACTIVITY_DOMAIN_HSA_API: { -#if 0 - if (op == HSA_API_ID_DISPATCH && !RocpLoader::Instance().RemoveApiCallback(op)) - FATAL_LOGGING("HSA::RemoveActivityCallback error(" << op << ") failed"); - break; -#endif - if (op >= HSA_API_ID_NUMBER) - EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, - "invalid HSA API operation ID(" << op << ")"); - hsa_support::cb_table.Set(op, nullptr, nullptr); - break; - } case ACTIVITY_DOMAIN_HIP_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { @@ -568,11 +431,6 @@ static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t o } break; } - case ACTIVITY_DOMAIN_HSA_EVT: { - if (!RocpLoader::Instance().RemoveEvtCallback(op)) - FATAL_LOGGING("HSA::RemoveEvtCallback error(" << op << ") failed"); - break; - } case ACTIVITY_DOMAIN_ROCTX: { if (RocTxLoader::Instance().Enabled() && !RocTxLoader::Instance().RemoveApiCallback(op)) FATAL_LOGGING("ROCTX::RemoveApiCallback(" << op << ") failed"); @@ -656,34 +514,11 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op roctracer_pool_t* pool) { assert(pool != nullptr); switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: { - if (op == HSA_OP_ID_COPY) { - std::scoped_lock lock(hsa_support::init_mutex); - - if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) { - [[maybe_unused]] hsa_status_t status = - hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true); - assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); - } - RocpLoader::Instance(); - hsa_support::async_copy_callback_enabled = true; - hsa_support::async_copy_callback_memory_pool = reinterpret_cast(pool); - } else { - const bool init_phase = (RocpLoader::GetRef() == nullptr); - if (RocpLoader::GetRef() == nullptr) break; - if (init_phase) { - RocpLoader::Instance().InitActivityCallback((void*)HSA_AsyncActivityCallback, - (void*)pool); - } - if (!RocpLoader::Instance().EnableActivityCallback(op, true)) - FATAL_LOGGING("HSA::EnableActivityCallback error"); - } - break; - } - case ACTIVITY_DOMAIN_HSA_API: - break; - case ACTIVITY_DOMAIN_HSA_EVT: + case ACTIVITY_DOMAIN_HSA_OPS: RocpLoader::Instance(); + case ACTIVITY_DOMAIN_HSA_API: + case ACTIVITY_DOMAIN_HSA_EVT: + hsa_support::EnableActivity(domain, op, pool); break; case ACTIVITY_DOMAIN_HIP_OPS: { if (HipLoader::Instance().Enabled() && @@ -757,29 +592,10 @@ ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domai // Disable activity records logging static void roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t op) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: { - if (op == HSA_OP_ID_COPY) { - std::scoped_lock lock(hsa_support::init_mutex); - - hsa_support::async_copy_callback_enabled = false; - hsa_support::async_copy_callback_memory_pool = nullptr; - - if (hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn != nullptr) { - [[maybe_unused]] hsa_status_t status = - hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false); - assert(status == HSA_STATUS_SUCCESS || status == HSA_STATUS_ERROR_NOT_INITIALIZED || - !"hsa_amd_profiling_async_copy_enable failed"); - } - } else { - if (RocpLoader::GetRef() == nullptr) break; - if (!RocpLoader::Instance().EnableActivityCallback(op, false)) - FATAL_LOGGING("HSA::EnableActivityCallback(false) error, op(" << op << ")"); - } - break; - } + case ACTIVITY_DOMAIN_HSA_OPS: case ACTIVITY_DOMAIN_HSA_API: - break; case ACTIVITY_DOMAIN_HSA_EVT: + hsa_support::DisableActivity(domain, op); break; case ACTIVITY_DOMAIN_HIP_OPS: { if (HipLoader::Instance().Enabled() && @@ -941,7 +757,7 @@ ROCTRACER_API void roctracer_stop() { ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) { API_METHOD_PREFIX - *timestamp = util::timestamp_ns(); + *timestamp = hsa_support::timestamp_ns(); API_METHOD_SUFFIX } @@ -985,71 +801,18 @@ __attribute__((destructor)) void destructor() { extern "C" { // The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader -// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee. +// itself, not by code during _init. 'extern const' seems to do that although that is not a +// guarantee. ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50; // HSA-runtime tool on-load method ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - std::scoped_lock lock(hsa_support::init_mutex); - - // Save the HSA core api and amd_ext api. - hsa_support::saved_core_api = *table->core_; - hsa_support::saved_amd_ext_api = *table->amd_ext_; - - // Enumerate the agents. - if (hsa_support::saved_core_api.hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - hsa_support::AgentInfo agent_info; - if (hsa_support::saved_core_api.hsa_agent_get_info_fn( - agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS) - FATAL_LOGGING("hsa_agent_get_info failed"); - switch (agent_info.type) { - case HSA_DEVICE_TYPE_CPU: - static int cpu_agent_count = 0; - agent_info.index = cpu_agent_count++; - break; - case HSA_DEVICE_TYPE_GPU: - static int gpu_agent_count = 0; - agent_info.index = gpu_agent_count++; - break; - default: - static int other_agent_count = 0; - agent_info.index = other_agent_count++; - break; - } - hsa_support::agent_info_map.emplace(agent.handle, agent_info); - return HSA_STATUS_SUCCESS; - }, - nullptr) != HSA_STATUS_SUCCESS) - FATAL_LOGGING("hsa_iterate_agents failed"); - - // Install the HSA_OPS intercept - table->amd_ext_->hsa_amd_memory_async_copy_fn = - hsa_support::hsa_amd_memory_async_copy_interceptor; - table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = - hsa_support::hsa_amd_memory_async_copy_rect_interceptor; - - // Install the HSA_API wrappers - hsa_support::InstallCoreApiWrappers(table->core_); - hsa_support::InstallAmdExtWrappers(table->amd_ext_); - hsa_support::InstallImageExtWrappers(table->image_ext_); - - if (hsa_support::async_copy_callback_enabled) { - [[maybe_unused]] hsa_status_t status = - hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(true); - assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); - } - + [](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names); + hsa_support::Initialize(table); return true; } -ROCTRACER_EXPORT void OnUnload() { - if (hsa_support::async_copy_callback_enabled) { - [[maybe_unused]] hsa_status_t status = - hsa_support::saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false); - assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); - } -} +ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); } } // extern "C" \ No newline at end of file diff --git a/src/tracer_tool/tracer_tool.cpp b/src/tracer_tool/tracer_tool.cpp index a1dc81a2ef..591a9c90e9 100644 --- a/src/tracer_tool/tracer_tool.cpp +++ b/src/tracer_tool/tracer_tool.cpp @@ -694,7 +694,8 @@ void tool_load() { extern "C" { // The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader -// itself, not by code during _init. 'extern const' seems do that although that is not a guarantee. +// itself, not by code during _init. 'extern const' seems to do that although that is not a +// guarantee. ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050; // HSA-runtime tool on-load method diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b38c459b4c..287a7e3a65 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,12 +68,6 @@ target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/i target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx) add_dependencies(mytest MatrixTranspose_ctest) -## Build hsaco_test reference test -add_library(hsaco_test SHARED app/hsaco_test.cpp) -target_compile_definitions(hsaco_test PRIVATE AMD_INTERNAL_BUILD) -target_link_libraries(hsaco_test hsa-runtime64::hsa-runtime64) -add_dependencies(mytest hsaco_test) - ## Build codeobj event test add_library(codeobj_test SHARED app/codeobj_test.cpp) target_include_directories(codeobj_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc) diff --git a/test/app/codeobj_test.cpp b/test/app/codeobj_test.cpp index 4593a8f4e9..16a8a72e92 100644 --- a/test/app/codeobj_test.cpp +++ b/test/app/codeobj_test.cpp @@ -18,68 +18,52 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include -#include +#include +#include +#include #include "inc/roctracer.h" #include "inc/roctracer_hsa.h" -#include - -#define PUBLIC_API __attribute__((visibility("default"))) -#define CONSTRUCTOR_API __attribute__((constructor)) -#define DESTRUCTOR_API __attribute__((destructor)) +namespace { // Check returned HSA API status -void check_status(roctracer_status_t status) { +inline void CHECK(roctracer_status_t status) { if (status != ROCTRACER_STATUS_SUCCESS) { - const char* error_string = roctracer_error_string(); - fprintf(stderr, "ERROR: %s\n", error_string); + fprintf(stderr, "ERROR: %s\n", roctracer_error_string()); abort(); } } // codeobj callback -void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) { +void CodeObjectCallback(uint32_t domain, uint32_t cid, const void* data, void* arg) { const hsa_evt_data_t* evt_data = reinterpret_cast(data); - const char* uri = evt_data->codeobj.uri; - printf( - "codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) " - "uri(\"%s\")\n", - domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size, - evt_data->codeobj.load_delta, uri); - free((void*)uri); - fflush(stdout); + fprintf(stdout, + "codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) " + "load_delta(0x%lx) uri(\"%s\") unload(%d)\n", + domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size, + evt_data->codeobj.load_delta, evt_data->codeobj.uri, evt_data->codeobj.unload); } -void initialize() { - roctracer_status_t status = roctracer_enable_op_callback( - ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL); - check_status(status); +} // namespace + +#include + +extern "C" { +// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader +// itself, not by code during _init. 'extern const' seems to do that although that is not a +// guarantee. +ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050; + +// HSA-runtime tool on-load method +ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, + uint64_t failed_tool_count, const char* const* failed_tool_names) { + CHECK(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, + CodeObjectCallback, nullptr)); + return true; } -void cleanup() { - roctracer_status_t status = roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT); - check_status(status); +ROCTRACER_EXPORT void OnUnload() { + CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT)); } -// Tool constructor -extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) { - // Enable HSA events intercepting - settings->hsa_intercepting = 1; - // Initialize profiling - initialize(); -} - -// Tool destructor -extern "C" PUBLIC_API void OnUnloadTool() { - // Final resources cleanup - cleanup(); -} - -extern "C" CONSTRUCTOR_API void constructor() { - printf("constructor\n"); - fflush(stdout); -} - -extern "C" DESTRUCTOR_API void destructor() { OnUnloadTool(); } +} // extern "C" \ No newline at end of file diff --git a/test/app/hsaco_test.cpp b/test/app/hsaco_test.cpp deleted file mode 100644 index b6e08cc51f..0000000000 --- a/test/app/hsaco_test.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include -#include - -#define PUBLIC_API __attribute__((visibility("default"))) -#define CONSTRUCTOR_API __attribute__((constructor)) -#define DESTRUCTOR_API __attribute__((destructor)) - -#define HSA_RT(call) \ - do { \ - const hsa_status_t status = call; \ - if (status != HSA_STATUS_SUCCESS) { \ - printf("error \"%s\"\n", #call); \ - fflush(stdout); \ - abort(); \ - } \ - } while (0) - -// HSA API intercepting primitives -decltype(hsa_executable_freeze)* hsa_executable_freeze_fn; -hsa_ven_amd_loader_1_01_pfn_t loader_api_table{}; - -hsa_status_t code_object_callback(hsa_executable_t executable, - hsa_loaded_code_object_t loaded_code_object, void* arg) { - printf("code_object_callback\n"); - fflush(stdout); - - uint64_t load_base = 0; - uint64_t load_size = 0; - uint64_t load_delta = 0; - uint32_t uri_len = 0; - char* uri_str = NULL; - - HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &load_base)); - HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &load_size)); - HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &load_delta)); - HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &uri_len)); - - uri_str = (char*)calloc(uri_len + 1, sizeof(char)); - if (!uri_str) { - perror("calloc"); - abort(); - } - - HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str)); - - printf("load_base(0x%lx)\n", load_base); - fflush(stdout); - printf("load_size(0x%lx)\n", load_size); - fflush(stdout); - printf("load_delta(0x%lx)\n", load_delta); - fflush(stdout); - printf("uri_len(%u)\n", uri_len); - fflush(stdout); - printf("uri_str(\"%s\")\n", uri_str); - fflush(stdout); - - free(uri_str); - - return HSA_STATUS_SUCCESS; -} - -hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char* options) { - HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( - executable, code_object_callback, NULL)); - HSA_RT(hsa_executable_freeze_fn(executable, options)); - return HSA_STATUS_SUCCESS; -} - -// HSA-runtime tool on-load method -extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, - uint64_t failed_tool_count, - const char* const* failed_tool_names) { - printf("OnLoad: begin\n"); - fflush(stdout); - // intercepting hsa_executable_freeze API - hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn; - table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; - // Fetching AMD Loader HSA extension API - HSA_RT(hsa_system_get_major_extension_table( - HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &loader_api_table)); - printf("OnLoad: end\n"); - fflush(stdout); - return true; -} - -extern "C" PUBLIC_API void OnUnload() { - printf("OnUnload\n"); - fflush(stdout); -} - -extern "C" CONSTRUCTOR_API void constructor() { - printf("constructor\n"); - fflush(stdout); -} - -extern "C" DESTRUCTOR_API void destructor() { - printf("destructor\n"); - fflush(stdout); -} diff --git a/test/golden_traces/tests_trace_cmp_levels.txt b/test/golden_traces/tests_trace_cmp_levels.txt index 439d505105..5d191021bd 100644 --- a/test/golden_traces/tests_trace_cmp_levels.txt +++ b/test/golden_traces/tests_trace_cmp_levels.txt @@ -14,7 +14,6 @@ MatrixTranspose_hip_input_trace --check-events .* copy_hsa_trace --check-events .* copy_hsa_input_trace --check-events .* load_unload_reload_trace --check-order .* --ignore-count hsa_agent_get_info -hsa_co_trace --check-none code_obj_trace --check-none trace_buffer --check-none memory_pool --check-none diff --git a/test/run.sh b/test/run.sh index 375dea39e5..f6f2e5d27c 100755 --- a/test/run.sh +++ b/test/run.sh @@ -169,11 +169,7 @@ unset ROCP_INPUT # Check that the tracer tool can be unloaded and then reloaded. eval_test "Load/Unload/Reload the tracer tool" ./test/load_unload_reload_test load_unload_reload_trace -export HSA_TOOLS_LIB="./test/libhsaco_test.so" -eval_test "tool HSA codeobj" ./test/MatrixTranspose hsa_co_trace - -export ROCP_TOOL_LIB=./test/libcodeobj_test.so -export HSA_TOOLS_LIB="librocprofiler64.so" +export LD_PRELOAD=./test/libcodeobj_test.so eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace unset LD_PRELOAD