diff --git a/source/docs/rocprofv3-schema.json b/source/docs/rocprofv3-schema.json index ad68c19ba1..20ac7ac318 100644 --- a/source/docs/rocprofv3-schema.json +++ b/source/docs/rocprofv3-schema.json @@ -725,6 +725,97 @@ ] } }, + "host_functions": { + "type": "array", + "description": "Host function records.", + "items": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "description": "Size of the host function record." + }, + "host_function_id": { + "type": "integer", + "description": "ID of the HIP host function." + }, + "kernel_id": { + "type": "integer", + "description": "ID of the corressponding kernel." + }, + "code_object_id": { + "type": "integer", + "description": "ID of the corressponding code object." + }, + "host_function_address": { + "type": "integer", + "description": "Address of the host function registered." + }, + "modules_address": { + "type": "integer", + "description": "Address of the hip fat binary registered" + }, + "device_function": { + "type": "string", + "description": "Device function name registered" + }, + "thread_limit": { + "type": "integer", + "description": "Size of the thread limit." + }, + "thread_id_address": { + "type": "integer", + "description": "Address of thread Id." + }, + "block_id_address": { + "type": "string", + "description": "Address of Block Id." + }, + "block_dim_address": { + "type": "string", + "description": "Address of Block Dimension" + }, + "grid_dim_address": { + "type": "string", + "description": "Address of the Grid Dimension." + }, + "workgroup_size_address": { + "type": "string", + "description": "Address of the work group size." + }, + "formatted_host_function_name": { + "type": "string", + "description": "Formatted name of the device function." + }, + "demangled_host_function_name": { + "type": "string", + "description": "Demangled name of device function." + }, + "truncated_host_function_name": { + "type": "string", + "description": "Truncated name of device function." + } + }, + "required": [ + "size", + "host_function_id", + "kernel_id", + "code_object_id", + "host_function_address", + "modules_address", + "device_function", + "thread_limit", + "thread_id_address", + "block_id_address", + "block_dim_address", + "grid_dim_address", + "workgroup_size_address", + "formatted_host_function_name", + "demangled_host_function_name", + "truncated_host_function_name" + ] + } + }, "callback_records": { "type": "object", "description": "Callback record details.", diff --git a/source/include/rocprofiler-sdk/callback_tracing.h b/source/include/rocprofiler-sdk/callback_tracing.h index eddff403a6..525d75d699 100644 --- a/source/include/rocprofiler-sdk/callback_tracing.h +++ b/source/include/rocprofiler-sdk/callback_tracing.h @@ -181,6 +181,27 @@ typedef struct uint32_t accum_vgpr_count; ///< Accum vector general purpose register count } rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; +// rename struct + +typedef struct +{ + uint64_t size; ///< size of this struct + uint64_t host_function_id; ///< unique host function identifier value + uint64_t kernel_id; ///< unique symbol identifier value + uint64_t code_object_id; ///< parent unique code object identifier + rocprofiler_address_t host_function; ///< kernel host function pointer + rocprofiler_address_t modules; ///< reference address where modules will be loaded + const char* device_function; + uint32_t thread_limit; ///< thread limit + rocprofiler_dim3_t thread_ids; ///< thread ids address + rocprofiler_dim3_t block_ids; ///< block ids address + rocprofiler_dim3_t block_dims; ///< block dimensions address + rocprofiler_dim3_t grid_dims; ///< grid dimensions address + uint64_t workgroup_size; ///< workgroup size address + + /// @var device_function + /// @brief device function name used to map the metadata during kernel launch +} rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t; /** * @brief ROCProfiler Kernel Dispatch Callback Tracer Record. diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index e92dadc95f..4a633261f6 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -139,6 +139,13 @@ save(ArchiveT& ar, rocprofiler_dim3_t data) ROCP_SDK_SAVE_DATA_FIELD(z); } +template +void +save(ArchiveT& ar, rocprofiler_address_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(value); +} + template void save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_load_data_t data) @@ -181,6 +188,25 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_regist ROCP_SDK_SAVE_DATA_FIELD(accum_vgpr_count); } +template +void +save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(size); + ROCP_SDK_SAVE_DATA_FIELD(host_function_id); + ROCP_SDK_SAVE_DATA_FIELD(kernel_id); + ROCP_SDK_SAVE_DATA_FIELD(code_object_id); + ROCP_SDK_SAVE_DATA_FIELD(host_function); + ROCP_SDK_SAVE_DATA_FIELD(modules); + ROCP_SDK_SAVE_DATA_CSTR(device_function); + ROCP_SDK_SAVE_DATA_FIELD(thread_limit); + ROCP_SDK_SAVE_DATA_FIELD(thread_ids); + ROCP_SDK_SAVE_DATA_FIELD(block_ids); + ROCP_SDK_SAVE_DATA_FIELD(block_dims); + ROCP_SDK_SAVE_DATA_FIELD(grid_dims); + ROCP_SDK_SAVE_DATA_FIELD(workgroup_size); +} + template void save(ArchiveT& ar, rocprofiler_hsa_api_retval_t data) diff --git a/source/include/rocprofiler-sdk/cxx/utility.hpp b/source/include/rocprofiler-sdk/cxx/utility.hpp index c81fc0bd12..0447a8c410 100644 --- a/source/include/rocprofiler-sdk/cxx/utility.hpp +++ b/source/include/rocprofiler-sdk/cxx/utility.hpp @@ -34,21 +34,12 @@ namespace utility { template auto -_as_hex(Tp val, size_t width = 0) +as_hex(Tp val, size_t width = 0) { auto ss = std::stringstream{}; - ss << "0x" << std::hex << std::setw(width) << std::setfill('0') << val; + ss << "0x" << std::hex << std::setfill('0') << std::setw(width) << val; return ss.str(); } - -#define ROCPROFILER_CXX_DEFINE_AS_HEX(TYPE) \ - inline auto as_hex(TYPE val, size_t width = 0) \ - { \ - return ::rocprofiler::sdk::utility::_as_hex(val, width); \ - } - -ROCPROFILER_CXX_DEFINE_AS_HEX(uint64_t) -#undef ROCPROFILER_CXX_DEFINE_AS_HEX } // namespace utility } // namespace sdk } // namespace rocprofiler diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index 52fe1c0c0e..6c6612ca95 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -216,7 +216,8 @@ typedef enum // NOLINT(performance-enum-size) { ROCPROFILER_CODE_OBJECT_NONE = 0, ///< Unknown code object operation ROCPROFILER_CODE_OBJECT_LOAD, ///< Code object containing kernel symbols - ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols + ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Device + ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER, ///< Kernel symbols - Host ROCPROFILER_CODE_OBJECT_LAST, } rocprofiler_code_object_operation_t; diff --git a/source/lib/output/CMakeLists.txt b/source/lib/output/CMakeLists.txt index 93c371582d..ca655389ee 100644 --- a/source/lib/output/CMakeLists.txt +++ b/source/lib/output/CMakeLists.txt @@ -19,6 +19,7 @@ set(TOOL_OUTPUT_HEADERS generateStats.hpp generator.hpp kernel_symbol_info.hpp + host_symbol_info.hpp metadata.hpp output_config.hpp output_key.hpp diff --git a/source/lib/output/generateJSON.cpp b/source/lib/output/generateJSON.cpp index e5e20c5172..b2846f88ca 100644 --- a/source/lib/output/generateJSON.cpp +++ b/source/lib/output/generateJSON.cpp @@ -164,10 +164,12 @@ write_json(json_output& json_ar, { auto kern_sym_data = tool_metadata.get_kernel_symbols(); + auto host_sym_data = tool_metadata.get_host_symbols(); auto code_obj_data = tool_metadata.get_code_objects(); json_ar(cereal::make_nvp("code_objects", code_obj_data)); json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data)); + json_ar(cereal::make_nvp("host_functions", host_sym_data)); } } diff --git a/source/lib/output/host_symbol_info.hpp b/source/lib/output/host_symbol_info.hpp new file mode 100644 index 0000000000..dc7a63c47f --- /dev/null +++ b/source/lib/output/host_symbol_info.hpp @@ -0,0 +1,92 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/demangle.hpp" +#include "lib/common/logging.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +using rocprofiler_host_kernel_symbol_data_t = + rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t; + +struct host_function_info : rocprofiler_host_kernel_symbol_data_t +{ + using base_type = rocprofiler_host_kernel_symbol_data_t; + + template + host_function_info(const base_type& _base, FuncT&& _formatter) + : base_type{_base} + , formatted_host_function_name{_formatter(CHECK_NOTNULL(_base.device_function))} + , demangled_host_function_name{common::cxx_demangle(CHECK_NOTNULL(_base.device_function))} + , truncated_host_function_name{common::truncate_name(demangled_host_function_name)} + {} + + host_function_info(); + ~host_function_info() = default; + host_function_info(const host_function_info&) = default; + host_function_info(host_function_info&&) noexcept = default; + host_function_info& operator=(const host_function_info&) = default; + host_function_info& operator=(host_function_info&&) noexcept = default; + + std::string formatted_host_function_name = {}; + std::string demangled_host_function_name = {}; + std::string truncated_host_function_name = {}; +}; + +using host_function_data_vec_t = std::vector; +using host_function_info_map_t = std::unordered_map; +} // namespace tool +} // namespace rocprofiler + +namespace cereal +{ +#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) + +template +void +save(ArchiveT& ar, const ::rocprofiler::tool::host_function_info& data) +{ + cereal::save( + ar, static_cast(data)); + SAVE_DATA_FIELD(formatted_host_function_name); + SAVE_DATA_FIELD(demangled_host_function_name); + SAVE_DATA_FIELD(truncated_host_function_name); +} + +#undef SAVE_DATA_FIELD +} // namespace cereal diff --git a/source/lib/output/metadata.cpp b/source/lib/output/metadata.cpp index 01ed1ffb8f..1919201df7 100644 --- a/source/lib/output/metadata.cpp +++ b/source/lib/output/metadata.cpp @@ -24,10 +24,13 @@ #include "lib/common/string_entry.hpp" #include "lib/output/agent_info.hpp" +#include "lib/output/host_symbol_info.hpp" +#include "lib/output/kernel_symbol_info.hpp" #include #include +#include namespace rocprofiler { @@ -67,6 +70,25 @@ kernel_symbol_info::kernel_symbol_info() : base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0} {} +constexpr auto null_address_v = rocprofiler_address_t{.value = 0}; +constexpr auto null_dim3_v = rocprofiler_dim3_t{.x = 0, .y = 0, .z = 0}; + +host_function_info::host_function_info() +: base_type{0, + 0, + 0, + 0, + null_address_v, + null_address_v, + "", + 0, + null_dim3_v, + null_dim3_v, + null_dim3_v, + null_dim3_v, + 0} +{} + metadata::metadata(inprocess) : buffer_names{sdk::get_buffer_tracing_names()} , callback_names{sdk::get_callback_tracing_names()} @@ -196,6 +218,14 @@ metadata::get_kernel_symbol(uint64_t kernel_id) const }); } +const host_function_info* +metadata::get_host_function(uint64_t host_function_id) const +{ + return host_functions.rlock([host_function_id](const auto& _data) -> const host_function_info* { + return &_data.at(host_function_id); + }); +} + const tool_counter_info* metadata::get_counter_info(uint64_t instance_id) const { @@ -271,6 +301,18 @@ metadata::get_kernel_symbols() const return _symbol_data; } +host_function_data_vec_t +metadata::get_host_symbols() const +{ + return host_functions.rlock([](const auto& _data_v) { + auto _info = std::vector{}; + _info.resize(_data_v.size() + 1, host_function_info{}); + for(const auto& itr : _data_v) + _info.at(itr.first) = itr.second; + return _info; + }); +} + metadata::agent_info_ptr_vec_t metadata::get_gpu_agents() const { @@ -362,6 +404,16 @@ metadata::add_kernel_symbol(kernel_symbol_info&& sym) std::move(sym)); } +bool +metadata::add_host_function(host_function_info&& func) +{ + return host_functions.wlock( + [](host_function_info_map_t& _data_v, host_function_info&& _func_v) -> bool { + return _data_v.emplace(_func_v.host_function_id, std::move(_func_v)).second; + }, + std::move(func)); +} + bool metadata::add_string_entry(size_t key, std::string_view str) { diff --git a/source/lib/output/metadata.hpp b/source/lib/output/metadata.hpp index a4405cde53..b7c5658185 100644 --- a/source/lib/output/metadata.hpp +++ b/source/lib/output/metadata.hpp @@ -24,6 +24,7 @@ #include "agent_info.hpp" #include "counter_info.hpp" +#include "host_symbol_info.hpp" #include "kernel_symbol_info.hpp" #include "pc_sample_transform.hpp" @@ -100,6 +101,7 @@ struct metadata synced_map marker_messages = {}; synced_map string_entries = {}; synced_map external_corr_ids = {}; + synced_map host_functions = {}; metadata() = default; metadata(inprocess); @@ -115,12 +117,14 @@ struct metadata const agent_info* get_agent(rocprofiler_agent_id_t _val) const; const code_object_info* get_code_object(uint64_t code_obj_id) const; const kernel_symbol_info* get_kernel_symbol(uint64_t kernel_id) const; + const host_function_info* get_host_function(uint64_t host_function_id) const; const tool_counter_info* get_counter_info(uint64_t instance_id) const; const tool_counter_info* get_counter_info(rocprofiler_counter_id_t id) const; const counter_dimension_info_vec_t* get_counter_dimension_info(uint64_t instance_id) const; code_object_data_vec_t get_code_objects() const; kernel_symbol_data_vec_t get_kernel_symbols() const; + host_function_data_vec_t get_host_symbols() const; agent_info_ptr_vec_t get_gpu_agents() const; counter_info_vec_t get_counter_info() const; counter_dimension_vec_t get_counter_dimension_info() const; @@ -138,6 +142,7 @@ struct metadata bool add_marker_message(uint64_t corr_id, std::string&& msg); bool add_code_object(code_object_info obj); bool add_kernel_symbol(kernel_symbol_info&& sym); + bool add_host_function(host_function_info&& func); bool add_string_entry(size_t key, std::string_view str); bool add_external_correlation_id(uint64_t); diff --git a/source/lib/rocprofiler-sdk-tool/helper.hpp b/source/lib/rocprofiler-sdk-tool/helper.hpp index 08eed5f505..26778c4131 100644 --- a/source/lib/rocprofiler-sdk-tool/helper.hpp +++ b/source/lib/rocprofiler-sdk-tool/helper.hpp @@ -86,7 +86,10 @@ constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); using marker_message_map_t = std::unordered_map; using tool_counter_info = ::rocprofiler::tool::tool_counter_info; using kernel_symbol_info = ::rocprofiler::tool::kernel_symbol_info; +using host_function_info = ::rocprofiler::tool::host_function_info; using rocprofiler_kernel_symbol_info_t = ::rocprofiler::tool::rocprofiler_kernel_symbol_info_t; +using rocprofiler_host_kernel_symbol_data_t = + ::rocprofiler::tool::rocprofiler_host_kernel_symbol_data_t; enum tracing_marker_kind { diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 1746b7183a..e85031a16c 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -649,6 +649,23 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, } } + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER) + { + auto* hst_data = static_cast(record.payload); + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + auto success = CHECK_NOTNULL(tool_metadata) + ->add_host_function(host_function_info{ + get_dereference(hst_data), + [](const char* val) { return tool::format_name(val); }}); + ROCP_WARNING_IF(!success) + << "duplicate host function found for kernel_id=" << hst_data->kernel_id; + + // TODO : kernel filtering for host functions?! + } + } + (void) user_data; (void) data; } diff --git a/source/lib/rocprofiler-sdk/code_object/code_object.cpp b/source/lib/rocprofiler-sdk/code_object/code_object.cpp index b9ba17d4ff..ea18099733 100644 --- a/source/lib/rocprofiler-sdk/code_object/code_object.cpp +++ b/source/lib/rocprofiler-sdk/code_object/code_object.cpp @@ -21,12 +21,14 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/code_object/code_object.hpp" +#include "lib/common/logging.hpp" #include "lib/common/scope_destructor.hpp" #include "lib/common/static_object.hpp" #include "lib/common/string_entry.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp" #include "lib/rocprofiler-sdk/code_object/hsa/code_object.hpp" #include "lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" @@ -41,10 +43,13 @@ #include #include +#include #include #include -#include +#include +#include #include +#include #include #include @@ -72,6 +77,7 @@ struct code_object_info; SPECIALIZE_CODE_OBJECT_INFO(NONE) SPECIALIZE_CODE_OBJECT_INFO(LOAD) SPECIALIZE_CODE_OBJECT_INFO(DEVICE_KERNEL_SYMBOL_REGISTER) +SPECIALIZE_CODE_OBJECT_INFO(HOST_KERNEL_SYMBOL_REGISTER) #undef SPECIALIZE_CODE_OBJECT_INFO @@ -356,6 +362,13 @@ get_kernel_symbol_id() return _v; } +auto& +get_host_function_id() +{ + static auto _v = std::atomic{}; + return _v; +} + using kernel_object_map_t = std::unordered_map; using executable_array_t = std::vector; using code_object_unload_array_t = std::vector; @@ -388,6 +401,14 @@ get_kernel_object_map() return _v; } +auto* +get_hip_register_data() +{ + static auto*& _v = + common::static_object>::construct(); + return _v; +} + hsa_status_t executable_iterate_agent_symbols_load_callback(hsa_executable_t executable, hsa_agent_t agent, @@ -723,14 +744,62 @@ get_destroy_function() return _v; } +auto& +get_hip_register_fatbinary_function() +{ + static decltype(::std::declval().__hipRegisterFatBinary_fn) _v = + nullptr; + return _v; +} + +auto& +get_hip_register_function_function() +{ + static decltype(::std::declval().__hipRegisterFunction_fn) _v = + nullptr; + return _v; +} + +bool +initialize_hip_binary_data() +{ + static bool is_initialized = + CHECK_NOTNULL(get_hip_register_data())->wlock([](hip::hip_register_data& data) { + ROCP_WARNING_IF(!data.fat_binary) << "No binary registered for HIP"; + if(!data.fat_binary) return false; + std::vector rocp_agents = rocprofiler::agent::get_agents(); + for(const auto* rocp_agent : rocp_agents) + { + if(rocp_agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue; + auto hsa_agent = agent::get_hsa_agent(rocp_agent); + if(!hsa_agent.has_value()) continue; + for(auto& isa : hip::get_isa_offsets(hsa_agent.value(), data.fat_binary)) + { + auto kernel_symbols_name_map = + hip::get_kernel_symbol_device_name_map(isa, data.fat_binary); + // many to one mapping as the same kernel symbols can be found in multiple code + // objects + if(!kernel_symbols_name_map.empty()) + data.kernel_symbol_device_map.insert(kernel_symbols_name_map.begin(), + kernel_symbols_name_map.end()); + } + } + return true; + }); + return is_initialized; +} + hsa_status_t executable_freeze(hsa_executable_t executable, const char* options) { hsa_status_t status = CHECK_NOTNULL(get_freeze_function())(executable, options); if(status != HSA_STATUS_SUCCESS) return status; - ROCP_INFO << "running " << __FUNCTION__ << " (executable=" << executable.handle << ")..."; + // before iterating code-object populate the host function map from registered binary + bool is_initialized = initialize_hip_binary_data(); + ROCP_ERROR_IF(!is_initialized) << "hip mapping data not initialized"; + ROCP_INFO << "running " << __FUNCTION__ << " (executable=" << executable.handle << ")..."; CHECK_NOTNULL(get_executables())->wlock([executable](executable_array_t& data) { data.emplace_back(executable); }); @@ -745,6 +814,7 @@ executable_freeze(hsa_executable_t executable, const char* options) constexpr auto CODE_OBJECT_LOAD = ROCPROFILER_CODE_OBJECT_LOAD; constexpr auto CODE_OBJECT_KERNEL_SYMBOL = ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER; + constexpr auto CODE_OBJECT_HOST_SYMBOL = ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER; auto&& context_filter = [](const context_t* ctx) { return (ctx->callback_tracer && ctx->callback_tracer->domains(CODE_OBJECT_KIND) && @@ -811,6 +881,40 @@ executable_freeze(hsa_executable_t executable, const char* options) citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND); auto& user_data = sitr->user_data[citr]; cb_data.callback(record, &user_data, cb_data.data); + + std::string device_name = + CHECK_NOTNULL(get_hip_register_data()) + ->rlock([sym_data]( + const hip::hip_register_data& register_data) { + const auto& sym_map = + register_data.kernel_symbol_device_map; + const auto it = sym_map.find(*CHECK_NOTNULL( + common::get_string_entry(sym_data.kernel_name))); + if(it != sym_map.end()) return it->second; + return std::string(); + }); + // Does not have a host function, skip + if(device_name.empty()) continue; + auto host_data = + CHECK_NOTNULL(get_hip_register_data()) + ->rlock([device_name]( + const hip::hip_register_data& register_data) { + return register_data.host_function_map.at(device_name); + }); + host_data.code_object_id = sym_data.code_object_id; + host_data.kernel_id = sym_data.kernel_id; + host_data.host_function_id = ++get_host_function_id(); + auto hip_record = rocprofiler_callback_tracing_record_t{ + .context_id = rocprofiler_context_id_t{citr->context_idx}, + .thread_id = tidx, + .correlation_id = rocprofiler_correlation_id_t{}, + .kind = CODE_OBJECT_KIND, + .operation = CODE_OBJECT_HOST_SYMBOL, + .phase = ROCPROFILER_CALLBACK_PHASE_LOAD, + .payload = static_cast(&host_data)}; + + // invoke callback + cb_data.callback(hip_record, &user_data, cb_data.data); } } } @@ -877,6 +981,63 @@ executable_destroy(hsa_executable_t executable) return CHECK_NOTNULL(get_destroy_function())(executable); } +void** +hip_register_fat_binary(const void* data) +{ + const hip::hip_fat_binary_wrapper* fbwrapper = + reinterpret_cast(data); + ROCP_ERROR_IF((fbwrapper->magic != hip::HIP_FAT_MAGIC || fbwrapper->version != 1)) + << "register fat binary failed"; + CHECK_NOTNULL(get_hip_register_data())->wlock([fbwrapper](hip::hip_register_data& reg_data) { + reg_data.fat_binary = fbwrapper->binary; + }); + return CHECK_NOTNULL(get_hip_register_fatbinary_function())(data); +} + +void +hip_register_function(void** modules, + const void* host_function, + char* device_function, + const char* device_name, + unsigned int thread_limit, + uint3* thread_id, + uint3* block_id, + dim3* block_dim, + dim3* grid_dim, + int* workgroup_size) +{ + auto convert_to_dim3 = [](auto* val) { + return (val) ? rocprofiler_dim3_t{.x = val->x, .y = val->y, .z = val->z} + : rocprofiler_dim3_t{0, 0, 0}; + }; + + CHECK_NOTNULL(get_hip_register_data())->wlock([&](hip::hip_register_data& data) { + const std::string* d_func = common::get_string_entry(device_function); + auto host_symbol = common::init_public_api_struct(hip::host_symbol_data_t{}); + host_symbol.host_function.ptr = const_cast(host_function); + host_symbol.modules.ptr = modules; + host_symbol.device_function = d_func->c_str(); + host_symbol.thread_limit = thread_limit; + host_symbol.thread_ids = convert_to_dim3(thread_id); + host_symbol.block_ids = convert_to_dim3(block_id); + host_symbol.block_dims = convert_to_dim3(block_dim); + host_symbol.grid_dims = convert_to_dim3(grid_dim); + host_symbol.workgroup_size = (workgroup_size) ? *workgroup_size : 0; + data.host_function_map.emplace(*CHECK_NOTNULL(d_func), host_symbol); + }); + CHECK_NOTNULL(get_hip_register_function_function()) + (modules, + host_function, + device_function, + device_name, + thread_limit, + thread_id, + block_id, + block_dim, + grid_dim, + workgroup_size); +} + std::vector shutdown(hsa_executable_t executable) { @@ -980,6 +1141,19 @@ initialize(HsaApiTable* table) } } +void +initialize(HipCompilerDispatchTable* table) +{ + get_hip_register_fatbinary_function() = CHECK_NOTNULL(table->__hipRegisterFatBinary_fn); + get_hip_register_function_function() = CHECK_NOTNULL(table->__hipRegisterFunction_fn); + table->__hipRegisterFatBinary_fn = hip_register_fat_binary; + table->__hipRegisterFunction_fn = hip_register_function; + ROCP_FATAL_IF(get_hip_register_fatbinary_function() == table->__hipRegisterFatBinary_fn) + << "infinite recursion"; + ROCP_FATAL_IF(get_hip_register_function_function() == table->__hipRegisterFunction_fn) + << "infinite recursion"; +} + uint64_t get_kernel_id(uint64_t kernel_object) { @@ -1013,7 +1187,6 @@ void iterate_loaded_code_objects(code_object_iterator_t&& func) { if(is_shutdown || !get_executables() || !get_code_objects()) return; - CHECK_NOTNULL(get_code_objects()) ->rlock( [](const code_object_array_t& data, code_object_iterator_t&& func_v) { diff --git a/source/lib/rocprofiler-sdk/code_object/code_object.hpp b/source/lib/rocprofiler-sdk/code_object/code_object.hpp index 0b31039ec7..5a5da6cfe2 100644 --- a/source/lib/rocprofiler-sdk/code_object/code_object.hpp +++ b/source/lib/rocprofiler-sdk/code_object/code_object.hpp @@ -59,6 +59,9 @@ iterate_loaded_code_objects(code_object_iterator_t&& func); void initialize(HsaApiTable* table); +void +initialize(HipCompilerDispatchTable* table); + void finalize(); } // namespace code_object diff --git a/source/lib/rocprofiler-sdk/code_object/hip/CMakeLists.txt b/source/lib/rocprofiler-sdk/code_object/hip/CMakeLists.txt index e6e9d7d78d..219edbfe54 100644 --- a/source/lib/rocprofiler-sdk/code_object/hip/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/code_object/hip/CMakeLists.txt @@ -1,8 +1,8 @@ # # code object data structures from HIP # -set(ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES) -set(ROCPROFILER_LIB_CODE_OBJECT_HIP_HEADERS) +set(ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES code_object.cpp) +set(ROCPROFILER_LIB_CODE_OBJECT_HIP_HEADERS code_object.hpp) target_sources( rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_CODE_OBJECT_HIP_SOURCES} diff --git a/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp b/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp new file mode 100644 index 0000000000..9aeb544258 --- /dev/null +++ b/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp @@ -0,0 +1,252 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/static_object.hpp" +#include "lib/common/string_entry.hpp" +#include "lib/common/synchronized.hpp" +#include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace code_object +{ +namespace hip +{ +constexpr auto kernels_metadata_lookup = "amdhsa.kernels"; +constexpr auto kernel_name_metadata_lookup = ".name"; +constexpr auto kernel_symbol_metadata_lookup = ".symbol"; + +#define CHECK_RETURN_HSA(call) \ + { \ + if(hsa_status_t status = (call); status != HSA_STATUS_SUCCESS) \ + { \ + const char* reason = ""; \ + if(rocprofiler::hsa::get_core_table()) \ + rocprofiler::hsa::get_core_table()->hsa_status_string_fn(status, &reason); \ + ROCP_INFO << #call << " returned error code " << status << " :: " << reason; \ + return status; \ + } \ + } + +#define CHECK_FATAL_COMGR(call) \ + if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ + { \ + const char* reason = ""; \ + amd_comgr_status_string(status, &reason); \ + ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason; \ + } + +#define CHECK_FATAL_COMGR_EXT(call, ...) \ + if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ + { \ + const char* reason = ""; \ + amd_comgr_status_string(status, &reason); \ + ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason \ + << " :: " << __VA_ARGS__; \ + } + +#define CHECK_RETURN_COMGR(call) \ + if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ + { \ + const char* reason = ""; \ + amd_comgr_status_string(status, &reason); \ + ROCP_INFO << #call << " returned error code " << status << " :: " << reason; \ + return AMD_COMGR_STATUS_ERROR; \ + } + +#define CHECK_RETURN_COMGR_EXT(call, ...) \ + if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ + { \ + const char* reason = ""; \ + amd_comgr_status_string(status, &reason); \ + ROCP_INFO << #call << " returned error code " << status << " :: " << reason \ + << " :: " << __VA_ARGS__; \ + return AMD_COMGR_STATUS_ERROR; \ + } + +hsa_status_t +get_isa_info(hsa_isa_t isa, void* data) +{ + size_t name_len = 0; + CHECK_RETURN_HSA(rocprofiler::hsa::get_core_table()->hsa_isa_get_info_alt_fn( + isa, HSA_ISA_INFO_NAME_LENGTH, &name_len)); + + ROCP_INFO << "isa name length: " << name_len; + + if(name_len > 0) + { + auto name = std::string(name_len, '\0'); + CHECK_RETURN_HSA(rocprofiler::hsa::get_core_table()->hsa_isa_get_info_alt_fn( + isa, HSA_ISA_INFO_NAME, name.data())); + name = name.substr(0, name.find_first_of('\0')); + + ROCP_INFO << "found isa: " << name; + + auto* info = static_cast(data); + CHECK_NOTNULL(info)->emplace_back(common::get_string_entry(name)); + } + + return HSA_STATUS_SUCCESS; +} + +comgr_code_object_vec_t +get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin) +{ + auto isas = isa_names_t{}; + auto hsa_status = rocprofiler::hsa::get_core_table()->hsa_agent_iterate_isas_fn( + hsa_agent, get_isa_info, &isas); + + if(isas.empty()) + { + ROCP_INFO << "failed to get ISAs for agent-" + << CHECK_NOTNULL(agent::get_rocprofiler_agent(hsa_agent))->node_id + << " :: " << rocprofiler::hsa::get_hsa_status_string(hsa_status); + return comgr_code_object_vec_t{}; + } + + auto query_list = comgr_code_object_vec_t{}; + for(auto& isa : isas) + query_list.emplace_back(amd_comgr_code_object_info_t{isa->c_str(), 0, 0}); + + auto data_object = amd_comgr_data_t{0}; + CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)); + CHECK_FATAL_COMGR( + amd_comgr_set_data(data_object, 4096, reinterpret_cast(fat_bin))); + CHECK_FATAL_COMGR( + amd_comgr_lookup_code_object(data_object, query_list.data(), query_list.size())); + CHECK_FATAL_COMGR(amd_comgr_release_data(data_object)); + + return query_list; +} + +amd_comgr_status_t +get_node_string(const amd_comgr_metadata_node_t& node, std::string* value) +{ + size_t size = 0; + CHECK_RETURN_COMGR(amd_comgr_get_metadata_string(node, &size, nullptr)); + CHECK_NOTNULL(value)->resize(size, '\0'); + CHECK_RETURN_COMGR(amd_comgr_get_metadata_string(node, &size, value->data())); + *value = value->substr(0, value->find_first_of('\0')); + ROCP_INFO << "found node string: " << *value; + return AMD_COMGR_STATUS_SUCCESS; +} + +amd_comgr_status_t +get_device_name_kernel_symbols_mapping(const amd_comgr_metadata_node_t key, + const amd_comgr_metadata_node_t value, + void* data) +{ + std::string key_str{}; + CHECK_RETURN_COMGR(get_node_string(key, &key_str)); + if(key_str != kernel_symbol_metadata_lookup) return AMD_COMGR_STATUS_SUCCESS; + + // More meta data information can be extracted from binary image here + std::string* kernel_symbol = static_cast(data); + CHECK_RETURN_COMGR(get_node_string(value, kernel_symbol)); + return AMD_COMGR_STATUS_SUCCESS; +} + +amd_comgr_status_t +get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset, + const void* fat_bin, + amd_comgr_metadata_node_t* kernels_metadata) +{ + auto binary_data = amd_comgr_data_t{0}; + CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data)); + + void* bin_offset = static_cast(const_cast(fat_bin)) + isa_offset.offset; + CHECK_RETURN_COMGR_EXT( + amd_comgr_set_data(binary_data, isa_offset.size, static_cast(bin_offset)), + "binary_data=" << binary_data.handle << ", isa=(" << isa_offset.isa << ", " + << isa_offset.size << ", " << isa_offset.offset << "), fat_bin=" << fat_bin); + + auto binary_metadata = amd_comgr_metadata_node_t{}; + CHECK_FATAL_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata)); + CHECK_FATAL_COMGR( + amd_comgr_metadata_lookup(binary_metadata, kernels_metadata_lookup, kernels_metadata)); + + return AMD_COMGR_STATUS_SUCCESS; +} + +kernel_symbol_hip_device_map_t +get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset, + const void* fat_bin) +{ + auto kernel_sym_device_func_map = kernel_symbol_hip_device_map_t{}; + auto kernels_metadata = amd_comgr_metadata_node_t{0}; + size_t num_kernels{0}; + + if(get_kernels_meta_node(isa_offset, CHECK_NOTNULL(fat_bin), &kernels_metadata) != + AMD_COMGR_STATUS_SUCCESS) + return kernel_sym_device_func_map; + + CHECK_FATAL_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels)); + for(size_t i = 0; i < num_kernels; i++) + { + auto kernel_node = amd_comgr_metadata_node_t{}; + auto kernel_name_meta = amd_comgr_metadata_node_t{}; + + CHECK_FATAL_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node)); + CHECK_FATAL_COMGR( + amd_comgr_metadata_lookup(kernel_node, kernel_name_metadata_lookup, &kernel_name_meta)); + + auto kernel_meta_name = std::string{}; + if(get_node_string(kernel_name_meta, &kernel_meta_name) != AMD_COMGR_STATUS_SUCCESS || + kernel_meta_name.empty()) + continue; + + ROCP_INFO << "found kernel meta name: " << kernel_meta_name; + + auto kernel_symbol = std::string{}; + CHECK_FATAL_COMGR(amd_comgr_iterate_map_metadata( + kernel_node, get_device_name_kernel_symbols_mapping, &kernel_symbol)); + if(!kernel_symbol.empty()) + { + ROCP_INFO << "found kernel symbol mapping: " << kernel_symbol << " -> " + << kernel_meta_name; + kernel_sym_device_func_map.emplace(kernel_symbol, kernel_meta_name); + } + } + return kernel_sym_device_func_map; +} +} // namespace hip +} // namespace code_object +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/code_object/hip/code_object.hpp b/source/lib/rocprofiler-sdk/code_object/hip/code_object.hpp new file mode 100644 index 0000000000..288717ec15 --- /dev/null +++ b/source/lib/rocprofiler-sdk/code_object/hip/code_object.hpp @@ -0,0 +1,89 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/common/logging.hpp" +#include "lib/common/scope_destructor.hpp" +#include "lib/common/static_object.hpp" +#include "lib/common/string_entry.hpp" +#include "lib/common/synchronized.hpp" +#include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/rocprofiler-sdk/code_object/code_object.hpp" +#include "lib/rocprofiler-sdk/code_object/hip/code_object.hpp" + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace code_object +{ +namespace hip +{ +using host_symbol_data_t = + rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t; +using hip_host_function_map_t = std::unordered_map; +using isa_names_t = std::vector; +using kernel_symbol_hip_device_map_t = std::unordered_map; +using comgr_code_object_vec_t = std::vector; + +constexpr unsigned HIP_FAT_MAGIC = 0x48495046; // HIPF + +struct hip_register_data +{ + const void* fat_binary = nullptr; + hip_host_function_map_t host_function_map = {}; + kernel_symbol_hip_device_map_t kernel_symbol_device_map = {}; +}; + +struct hip_fat_binary_wrapper +{ + unsigned int magic = 0; + unsigned int version = 0; + void* binary = nullptr; + void* dummy1 = nullptr; +}; + +comgr_code_object_vec_t +get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin); + +kernel_symbol_hip_device_map_t +get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset, + const void* fat_bin); + +} // namespace hip +} // namespace code_object +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/registration.cpp b/source/lib/rocprofiler-sdk/registration.cpp index 1784f61ec3..76988c68ee 100644 --- a/source/lib/rocprofiler-sdk/registration.cpp +++ b/source/lib/rocprofiler-sdk/registration.cpp @@ -763,6 +763,8 @@ rocprofiler_set_api_table(const char* name, // the HIP API tracing invokes the function pointers from the copy below rocprofiler::hip::copy_table(hip_compiler_api_table, lib_instance); + rocprofiler::code_object::initialize(hip_compiler_api_table); + // install rocprofiler API wrappers rocprofiler::hip::update_table(hip_compiler_api_table); diff --git a/tests/async-copy-tracing/validate.py b/tests/async-copy-tracing/validate.py index bb703d49ab..1a77814552 100644 --- a/tests/async-copy-tracing/validate.py +++ b/tests/async-copy-tracing/validate.py @@ -34,6 +34,7 @@ def test_data_structure(input_data): node_exists("names", sdk_data["callback_records"]) node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) + node_exists("host_functions", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) node_exists("hip_api_traces", sdk_data["callback_records"], 0) node_exists("marker_api_traces", sdk_data["callback_records"]) diff --git a/tests/hip-graph-tracing/validate.py b/tests/hip-graph-tracing/validate.py index 0c2235c55f..aab643f244 100644 --- a/tests/hip-graph-tracing/validate.py +++ b/tests/hip-graph-tracing/validate.py @@ -34,6 +34,7 @@ def test_data_structure(input_data): node_exists("names", sdk_data["callback_records"]) node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) + node_exists("host_functions", sdk_data["callback_records"]) node_exists("hip_api_traces", sdk_data["callback_records"]) node_exists("kernel_dispatch", sdk_data["callback_records"]) diff --git a/tests/kernel-tracing/validate.py b/tests/kernel-tracing/validate.py index bb78b1a94f..78c0e05f68 100644 --- a/tests/kernel-tracing/validate.py +++ b/tests/kernel-tracing/validate.py @@ -34,6 +34,7 @@ def test_data_structure(input_data): node_exists("names", sdk_data["callback_records"]) node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) + node_exists("host_functions", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) node_exists("hip_api_traces", sdk_data["callback_records"], 0) node_exists("marker_api_traces", sdk_data["callback_records"]) diff --git a/tests/page-migration/validate.py b/tests/page-migration/validate.py index 4d859f693e..86825ff99d 100644 --- a/tests/page-migration/validate.py +++ b/tests/page-migration/validate.py @@ -82,6 +82,7 @@ def test_data_structure(input_data): node_exists("names", sdk_data["callback_records"]) node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) + node_exists("host_functions", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) node_exists("hip_api_traces", sdk_data["callback_records"], 0) node_exists("marker_api_traces", sdk_data["callback_records"], 0) diff --git a/tests/rocprofv3/tracing/validate.py b/tests/rocprofv3/tracing/validate.py index f107c39d34..0bbc87e136 100644 --- a/tests/rocprofv3/tracing/validate.py +++ b/tests/rocprofv3/tracing/validate.py @@ -121,6 +121,34 @@ def test_kernel_trace(kernel_input_data): assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) +def test_host_functions_json(json_data): + data = json_data["rocprofiler-sdk-tool"] + + def get_kernel_name(kernel_id): + return data["kernel_symbols"][kernel_id]["truncated_kernel_name"] + + def get_host_function_name(host_function_id): + return data["host_functions"][host_function_id]["truncated_host_function_name"] + + host_function_data = data.host_functions + kernel_symbols_data = data.kernel_symbols + code_objects_data = data.code_objects + assert len(host_function_data) > 0 + for host_function in host_function_data: + if host_function.host_function_id == 0: + continue + assert host_function.host_function_id > 0 + assert host_function.kernel_id > 0 and host_function.kernel_id <= len( + kernel_symbols_data + ) + assert host_function.code_object_id > 0 and host_function.code_object_id <= len( + code_objects_data + ) + assert get_host_function_name( + host_function["host_function_id"] + ) == get_kernel_name(host_function.kernel_id) + + def test_kernel_trace_json(json_data): data = json_data["rocprofiler-sdk-tool"] diff --git a/tests/scratch-memory-tracing/validate.py b/tests/scratch-memory-tracing/validate.py index 21f939e0fb..68be3b8dae 100755 --- a/tests/scratch-memory-tracing/validate.py +++ b/tests/scratch-memory-tracing/validate.py @@ -40,6 +40,7 @@ def test_data_structure(input_data): node_exists("names", sdk_data["callback_records"]) node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) + node_exists("host_functions", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) node_exists("hip_api_traces", sdk_data["callback_records"], 0) node_exists("scratch_memory_traces", sdk_data["callback_records"], min_len=8) diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index 8b9f93c74d..d43fe1e979 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -202,7 +202,10 @@ make_array(Tp&& arg, Args&&... args) using call_stack_t = std::vector; using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; +using host_function_data_t = + rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t; using kernel_symbol_map_t = std::unordered_map; +using host_functions_map_t = std::unordered_map; rocprofiler_client_id_t* client_id = nullptr; rocprofiler_client_finalize_t client_fini_func = nullptr; @@ -294,6 +297,21 @@ struct kernel_symbol_callback_record_t } }; +struct host_function_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t payload = {}; + + template + void save(ArchiveT& ar) const + { + ar(cereal::make_nvp("timestamp", timestamp)); + cereal::save(ar, record); + ar(cereal::make_nvp("payload", payload)); + } +}; + struct runtime_init_callback_record_t { uint64_t timestamp = 0; @@ -527,6 +545,7 @@ auto counter_info = std::deque{} auto runtime_init_cb_records = std::deque{}; auto code_object_records = std::deque{}; auto kernel_symbol_records = std::deque{}; +auto host_function_records = std::deque{}; auto hsa_api_cb_records = std::deque{}; auto marker_api_cb_records = std::deque{}; auto counter_collection_bf_records = std::deque{}; @@ -676,6 +695,13 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, auto _lk = std::unique_lock{_mutex}; kernel_symbol_records.emplace_back(kernel_symbol_callback_record_t{ts, record, data_v}); } + else if(record.operation == ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER) + { + auto data_v = *static_cast(record.payload); + static auto _mutex = std::mutex{}; + auto _lk = std::unique_lock{_mutex}; + host_function_records.emplace_back(host_function_callback_record_t{ts, record, data_v}); + } } else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API || record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API || @@ -1652,6 +1678,7 @@ tool_fini(void* tool_data) << ", runtime_init_callback_records=" << runtime_init_cb_records.size() << ", code_object_callback_records=" << code_object_records.size() << ", kernel_symbol_callback_records=" << kernel_symbol_records.size() + << ", host_function_callback_records=" << host_function_records.size() << ", hsa_api_callback_records=" << hsa_api_cb_records.size() << ", hip_api_callback_records=" << hip_api_cb_records.size() << ", marker_api_callback_records=" << marker_api_cb_records.size() @@ -1760,6 +1787,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("runtime_init", runtime_init_cb_records)); json_ar(cereal::make_nvp("code_objects", code_object_records)); json_ar(cereal::make_nvp("kernel_symbols", kernel_symbol_records)); + json_ar(cereal::make_nvp("host_functions", host_function_records)); json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_cb_records)); json_ar(cereal::make_nvp("hip_api_traces", hip_api_cb_records)); json_ar(cereal::make_nvp("marker_api_traces", marker_api_cb_records));