// MIT License // // Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "buffered_output.hpp" #include "config.hpp" #include "csv.hpp" #include "domain_type.hpp" #include "generateCSV.hpp" #include "generateJSON.hpp" #include "generatePerfetto.hpp" #include "helper.hpp" #include "output_file.hpp" #include "tmp_file.hpp" #include "lib/common/environment.hpp" #include "lib/common/filesystem.hpp" #include "lib/common/logging.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(CODECOV) && CODECOV > 0 extern "C" { extern void __gcov_dump(void); } #endif namespace common = ::rocprofiler::common; namespace tool = ::rocprofiler::tool; namespace { constexpr uint32_t lds_block_size = 128 * 4; auto destructors = new std::vector>{}; template Tp& get_dereference(Tp* ptr) { return *CHECK_NOTNULL(ptr); } auto get_destructors_lock() { static auto _mutex = std::mutex{}; return std::unique_lock{_mutex}; } template Tp*& add_destructor(Tp*& ptr) { auto _lk = get_destructors_lock(); destructors->emplace_back([&ptr]() { delete ptr; ptr = nullptr; }); return ptr; } #define ADD_DESTRUCTOR(PTR) \ { \ static auto _once = std::once_flag{}; \ std::call_once(_once, []() { add_destructor(PTR); }); \ } tool::output_file*& get_list_basic_metrics_file() { static auto* _v = new tool::output_file{"basic_metrics", tool::csv::list_basic_metrics_csv_encoder{}, {"Agent_Id", "Name", "Description", "Block", "Dimensions"}}; ADD_DESTRUCTOR(_v); return _v; } tool::output_file*& get_list_derived_metrics_file() { static auto* _v = new tool::output_file{"derived_metrics", tool::csv::list_derived_metrics_csv_encoder{}, {"Agent_Id", "Name", "Description", "Expression", "Dimensions"}}; ADD_DESTRUCTOR(_v); return _v; } #undef ADD_DESTRUCTOR struct buffer_ids { rocprofiler_buffer_id_t hsa_api_trace = {}; rocprofiler_buffer_id_t hip_api_trace = {}; rocprofiler_buffer_id_t kernel_trace = {}; rocprofiler_buffer_id_t memory_copy_trace = {}; rocprofiler_buffer_id_t counter_collection = {}; rocprofiler_buffer_id_t scratch_memory = {}; auto as_array() const { return std::array{hsa_api_trace, hip_api_trace, kernel_trace, memory_copy_trace, counter_collection, scratch_memory}; } }; buffer_ids& get_buffers() { static auto _v = buffer_ids{}; return _v; } using rocprofiler_code_object_data_t = rocprofiler_callback_tracing_code_object_load_data_t; template Tp* as_pointer(Tp&& _val) { return new Tp{std::forward(_val)}; } template Tp* as_pointer() { return new Tp{}; } using code_object_data_map_t = std::unordered_map; using targeted_kernels_map_t = std::unordered_map>; using counter_dimension_info_map_t = std::unordered_map>; using agent_info_map_t = std::unordered_map; using kernel_iteration_t = std::unordered_map; auto code_obj_data = as_pointer>(); auto* kernel_data = as_pointer>(); auto* marker_msg_data = as_pointer>(); auto counter_dimension_data = common::Synchronized{}; auto target_kernels = common::Synchronized{}; auto* buffered_name_info = as_pointer(get_buffer_id_names()); auto* callback_name_info = as_pointer(get_callback_id_names()); auto* agent_info = as_pointer(agent_info_map_t{}); auto* tool_functions = as_pointer(tool_table{}); auto* stats_timestamp = as_pointer(timestamps_t{}); auto kernel_iteration = common::Synchronized{}; bool add_kernel_target(uint64_t _kern_id, const std::unordered_set& range) { return target_kernels .wlock( [](targeted_kernels_map_t& _targets_v, uint64_t _kern_id_v, std::unordered_set _range) { return _targets_v.emplace(_kern_id_v, _range); }, _kern_id, range) .second; } bool is_targeted_kernel(uint64_t _kern_id) { bool is_target_kernel = false; std::unordered_set range = {}; is_target_kernel = target_kernels.rlock( [&range](const auto& _targets_v, uint64_t _kern_id_v) { if(_targets_v.find(_kern_id_v) != _targets_v.end()) { range = _targets_v.at(_kern_id_v); return true; } return false; }, _kern_id); if(is_target_kernel) { kernel_iteration.rlock( [&](const auto& _kernel_iter, uint64_t _kernel_id, std::unordered_set _range) { auto itr = _kernel_iter.at(_kernel_id); // If the iteration range is not given then all iterations of the kernel is profiled if(_range.empty()) is_target_kernel = true; else if(_range.find(itr) != _range.end()) { is_target_kernel = true; } else is_target_kernel = false; }, _kern_id, range); } return is_target_kernel; } auto& get_client_ctx() { static rocprofiler_context_id_t context_id; return context_id; } void flush() { ROCP_INFO << "flushing buffers..."; for(auto itr : get_buffers().as_array()) { if(itr.handle > 0) { ROCP_INFO << "flushing buffer " << itr.handle; ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush"); } } ROCP_INFO << "Buffers flushed"; } std::string_view get_callback_kind(rocprofiler_callback_tracing_kind_t kind) { return CHECK_NOTNULL(callback_name_info)->at(kind); } std::string_view get_callback_op_name(rocprofiler_callback_tracing_kind_t kind, uint32_t op) { return CHECK_NOTNULL(callback_name_info)->at(kind, op); } std::string_view get_roctx_msg(uint64_t cid) { return CHECK_NOTNULL(marker_msg_data) ->rlock( [](const auto& _data, uint64_t _cid_v) -> std::string_view { return _data.at(_cid_v); }, cid); } void cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* cb_data) { auto* ctx = static_cast(cb_data); if(ctx && record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER && record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause) { ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing context"); } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume) { ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming context"); } auto ts = rocprofiler_timestamp_t{}; rocprofiler_get_timestamp(&ts); if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { user_data->value = ts; } else { auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = user_data->value; marker_record.end_timestamp = ts; write_ring_buffer(marker_record, domain_type::MARKER); } } } void callback_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* data) { static thread_local auto stacked_range = std::vector{}; static auto global_range = common::Synchronized< std::unordered_map>{}; if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) { auto* marker_data = static_cast(record.payload); auto ts = rocprofiler_timestamp_t{}; rocprofiler_get_timestamp(&ts); if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { CHECK_NOTNULL(marker_msg_data) ->wlock( [](auto& _data, uint64_t _cid_v, std::string&& _msg) { _data.emplace(_cid_v, std::move(_msg)); }, record.correlation_id.internal, std::string{marker_data->args.roctxMarkA.message}); auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = ts; marker_record.end_timestamp = ts; write_ring_buffer(marker_record, domain_type::MARKER); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { if(marker_data->args.roctxRangePushA.message) { CHECK_NOTNULL(marker_msg_data) ->wlock( [](auto& _data, uint64_t _cid_v, std::string&& _msg) { _data.emplace(_cid_v, std::move(_msg)); }, record.correlation_id.internal, std::string{marker_data->args.roctxRangePushA.message}); auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = ts; marker_record.end_timestamp = 0; stacked_range.emplace_back(marker_record); } } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { ROCP_FATAL_IF(stacked_range.empty()) << "roctxRangePop invoked more times than roctxRangePush on thread " << rocprofiler::common::get_tid(); auto val = stacked_range.back(); stacked_range.pop_back(); val.end_timestamp = ts; write_ring_buffer(val, domain_type::MARKER); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxRangeStartA.message) { CHECK_NOTNULL(marker_msg_data) ->wlock( [](auto& _data, uint64_t _cid_v, std::string&& _msg) { _data.emplace(_cid_v, std::move(_msg)); }, record.correlation_id.internal, std::string{marker_data->args.roctxRangeStartA.message}); auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = ts; marker_record.end_timestamp = 0; auto _id = marker_data->retval.roctx_range_id_t_retval; global_range.wlock( [](auto& map, roctx_range_id_t _range_id, auto&& _record) { map.emplace(_range_id, std::move(_record)); }, _id, marker_record); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { auto _id = marker_data->args.roctxRangeStop.id; auto&& _entry = global_range.rlock( [](const auto& map, auto _key) { return map.at(_key); }, _id); _entry.end_timestamp = ts; write_ring_buffer(_entry, domain_type::MARKER); global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id); } } else { if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { user_data->value = ts; } else { auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = user_data->value; marker_record.end_timestamp = ts; write_ring_buffer(marker_record, domain_type::MARKER); } } } (void) record; (void) user_data; (void) data; } void code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* data) { auto ts = rocprofiler_timestamp_t{}; ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts), "get timestamp"); if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && record.operation == ROCPROFILER_CODE_OBJECT_LOAD) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) { auto* obj_data = static_cast(record.payload); code_obj_data->wlock( [](code_object_data_map_t& cdata, rocprofiler_code_object_data_t* obj_data_v) { cdata.emplace(obj_data_v->code_object_id, *obj_data_v); }, CHECK_NOTNULL(obj_data)); } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) { flush(); } } if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) { auto* sym_data = static_cast(record.payload); if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) { auto itr = kernel_data->wlock([sym_data](auto& _data) { return _data.emplace(sym_data->kernel_id, kernel_symbol_data{get_dereference(sym_data)}); }); ROCP_WARNING_IF(!itr.second) << "duplicate kernel symbol data for kernel_id=" << sym_data->kernel_id; // add the kernel to the kernel_targets if if(itr.second) { // if kernel name is provided by user then by default all kernels in the application // are targeted const auto& kernel_info = itr.first->second; auto kernel_filter_include = tool::get_config().kernel_filter_include; auto kernel_filter_exclude = tool::get_config().kernel_filter_exclude; auto kernel_filter_range = tool::get_config().kernel_filter_range; std::regex include_regex(kernel_filter_include); std::regex exclude_regex(kernel_filter_exclude); if(std::regex_search(kernel_info.formatted_kernel_name, include_regex)) { if(kernel_filter_exclude.empty()) add_kernel_target(sym_data->kernel_id, kernel_filter_range); else if(!std::regex_search(kernel_info.formatted_kernel_name, exclude_regex)) add_kernel_target(sym_data->kernel_id, kernel_filter_range); } } } } (void) user_data; (void) data; } std::string_view get_kernel_name(uint64_t kernel_id) { return CHECK_NOTNULL(kernel_data)->rlock([kernel_id](const auto& _data) -> std::string_view { return _data.at(kernel_id).formatted_kernel_name; }); } std::string_view get_domain_name(rocprofiler_buffer_tracing_kind_t record_kind) { return CHECK_NOTNULL(buffered_name_info)->at(record_kind); } uint64_t get_agent_node_id(rocprofiler_agent_id_t agent_id) { return agent_info->at(agent_id).logical_node_id; } std::string_view get_operation_name(rocprofiler_buffer_tracing_kind_t kind, rocprofiler_tracing_operation_t op) { return CHECK_NOTNULL(buffered_name_info)->at(kind, op); } void buffered_tracing_callback(rocprofiler_context_id_t /*context*/, rocprofiler_buffer_id_t /*buffer_id*/, rocprofiler_record_header_t** headers, size_t num_headers, void* /*user_data*/, uint64_t /*drop_count*/) { ROCP_INFO << "Executing buffered tracing callback for " << num_headers << " headers"; if(!headers) return; for(size_t i = 0; i < num_headers; ++i) { auto* header = headers[i]; if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING) { if(header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) { auto* record = static_cast( header->payload); write_ring_buffer(*record, domain_type::KERNEL_DISPATCH); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API || header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API || header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API) { auto* record = static_cast(header->payload); write_ring_buffer(*record, domain_type::HSA); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) { auto* record = static_cast(header->payload); write_ring_buffer(*record, domain_type::MEMORY_COPY); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) { auto* record = static_cast( header->payload); write_ring_buffer(*record, domain_type::SCRATCH_MEMORY); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API || header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) { auto* record = static_cast(header->payload); write_ring_buffer(*record, domain_type::HIP); } else { ROCP_FATAL << fmt::format( "unsupported category + kind: {} + {}", header->category, header->kind); } } } } using counter_vec_t = std::vector; using agent_counter_map_t = std::unordered_map>; rocprofiler_status_t dimensions_info_callback(rocprofiler_counter_id_t id, const rocprofiler_record_dimension_info_t* dim_info, long unsigned int num_dims, void* user_data) { if(user_data != nullptr) { auto* dimensions_info = static_cast*>(user_data); dimensions_info->reserve(num_dims); for(size_t j = 0; j < num_dims; j++) dimensions_info->emplace_back(dim_info[j]); } else { counter_dimension_data.wlock( [&id, &dim_info, &num_dims](counter_dimension_info_map_t& counter_dimension_data_v) { if(counter_dimension_data_v.find(id.handle) == counter_dimension_data_v.end()) { auto dimensions = std::vector{}; dimensions.reserve(num_dims); for(size_t dim = 0; dim < num_dims; ++dim) dimensions.emplace_back(dim_info[dim]); counter_dimension_data_v.emplace(id.handle, std::move(dimensions)); } }); } return ROCPROFILER_STATUS_SUCCESS; } struct tool_agent { int64_t device_id = 0; const rocprofiler_agent_v0_t* agent = nullptr; }; using tool_agent_vec_t = std::vector; auto get_gpu_agents() { auto _gpu_agents = tool_agent_vec_t{}; ROCPROFILER_CALL( rocprofiler_query_available_agents( ROCPROFILER_AGENT_INFO_VERSION_0, [](rocprofiler_agent_version_t, const void** agents, size_t num_agents, void* _data) { auto* _gpu_agents_v = static_cast(_data); for(size_t i = 0; i < num_agents; ++i) { auto* agent = static_cast(agents[i]); if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) _gpu_agents_v->emplace_back(tool_agent{0, agent}); } return ROCPROFILER_STATUS_SUCCESS; }, sizeof(rocprofiler_agent_t), &_gpu_agents), "Iterate rocporfiler agents") // make sure they are sorted by node id std::sort(_gpu_agents.begin(), _gpu_agents.end(), [](const auto& lhs, const auto& rhs) { return CHECK_NOTNULL(lhs.agent)->node_id < CHECK_NOTNULL(rhs.agent)->node_id; }); int64_t _dev_id = 0; for(auto& itr : _gpu_agents) itr.device_id = _dev_id++; return _gpu_agents; } auto get_agent_counter_info(const tool_agent_vec_t& _agents) { using value_type = std::unordered_map>; auto _data = value_type{}; for(auto itr : _agents) { ROCPROFILER_CALL( rocprofiler_iterate_agent_supported_counters( itr.agent->id, [](rocprofiler_agent_id_t id, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) { auto* data_v = static_cast(user_data); for(size_t i = 0; i < num_counters; ++i) { // populate global map ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( counters[i], dimensions_info_callback, nullptr), "iterate_dimension_info"); auto _info = rocprofiler_counter_info_v0_t{}; auto _dim_ids = std::vector{}; auto _dim_info = std::vector{}; ROCPROFILER_CALL( rocprofiler_query_counter_info( counters[i], ROCPROFILER_COUNTER_INFO_VERSION_0, &_info), "Could not query counter_id"); // populate local vector ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( counters[i], dimensions_info_callback, &_dim_info), "iterate_dimension_info"); _dim_ids.reserve(_dim_info.size()); for(auto ditr : _dim_info) _dim_ids.emplace_back(ditr.id); (*data_v)[id].emplace_back( id, _info, std::move(_dim_ids), std::move(_dim_info)); } return ROCPROFILER_STATUS_SUCCESS; }, &_data), "iterate agent supported counters"); std::sort(_data.at(itr.agent->id).begin(), _data.at(itr.agent->id).end(), [](const auto& lhs, const auto& rhs) { return (lhs.id.handle < rhs.id.handle); }); for(auto& citr : _data.at(itr.agent->id)) { std::sort(citr.dimension_ids.begin(), citr.dimension_ids.end()); std::sort(citr.dimension_info.begin(), citr.dimension_info.end(), [](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); }); } } return _data; } const tool_agent* get_tool_agent(rocprofiler_agent_id_t id, const tool_agent_vec_t& data) { for(const auto& itr : data) { if(id == itr.agent->id) return &itr; } return nullptr; } // this function creates a rocprofiler profile config on the first entry auto get_agent_profile(rocprofiler_agent_id_t agent_id) { static auto data = common::Synchronized{}; static const auto gpu_agents = get_gpu_agents(); static const auto gpu_agents_counter_info = get_agent_counter_info(gpu_agents); auto profile = std::optional{}; data.ulock( [agent_id, &profile](const agent_counter_map_t& data_v) { auto itr = data_v.find(agent_id); if(itr != data_v.end()) { profile = itr->second; return true; } return false; }, [agent_id, &profile](agent_counter_map_t& data_v) { auto counters_v = counter_vec_t{}; auto found_v = std::vector{}; const auto* tool_agent_v = get_tool_agent(agent_id, gpu_agents); auto expected_v = tool::get_config().counters.size(); constexpr auto device_qualifier = std::string_view{":device="}; for(const auto& itr : tool::get_config().counters) { auto name_v = itr; if(auto pos = std::string::npos; (pos = itr.find(device_qualifier)) != std::string::npos) { name_v = itr.substr(0, pos); auto dev_id_s = itr.substr(pos + device_qualifier.length()); LOG_IF(FATAL, dev_id_s.empty() || dev_id_s.find_first_not_of("0123456789") != std::string::npos) << "invalid device qualifier format (':device=N) where N is the GPU id: " << itr; auto dev_id_v = std::stol(dev_id_s); // skip this counter if the counter is for a specific device id (which doesn't // this agent's device id) if(dev_id_v != tool_agent_v->device_id) { --expected_v; // is not expected continue; } } // search the gpu agent counter info for a counter with a matching name for(const auto& citr : gpu_agents_counter_info.at(agent_id)) { if(name_v == std::string_view{citr.name}) { counters_v.emplace_back(citr.id); found_v.emplace_back(itr); } } } if(expected_v != counters_v.size()) { auto requested_counters = fmt::format("{}", fmt::join(tool::get_config().counters.begin(), tool::get_config().counters.end(), ", ")); auto found_counters = fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", ")); LOG(FATAL) << "Unable to find all counters for agent " << tool_agent_v->agent->node_id << " (gpu-" << tool_agent_v->device_id << ", " << tool_agent_v->agent->name << ") in [" << requested_counters << "]. Found: [" << found_counters << "]"; } if(!counters_v.empty()) { auto profile_v = rocprofiler_profile_config_id_t{}; ROCPROFILER_CALL(rocprofiler_create_profile_config( agent_id, counters_v.data(), counters_v.size(), &profile_v), "Could not construct profile cfg"); profile = profile_v; } data_v.emplace(agent_id, profile); return true; }); return profile; } void dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* user_data, void* /*callback_data_args*/) { auto kernel_id = dispatch_data.dispatch_info.kernel_id; auto agent_id = dispatch_data.dispatch_info.agent_id; kernel_iteration.wlock( [](auto& _kernel_iter, rocprofiler_kernel_id_t _kernel_id) { auto itr = _kernel_iter.find(_kernel_id); if(itr == _kernel_iter.end()) _kernel_iter.emplace(_kernel_id, 1); else { itr->second++; } }, kernel_id); if(!is_targeted_kernel(kernel_id)) { return; } else if(auto profile = get_agent_profile(agent_id)) { *config = *profile; user_data->value = common::get_tid(); } } std::string get_counter_info_name(uint64_t record_id) { auto info = rocprofiler_counter_info_v0_t{}; auto counter_id = rocprofiler_counter_id_t{}; ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_id, &counter_id), "query record counter id"); if(rocprofiler_query_counter_info(rocprofiler_counter_id_t{counter_id}, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&info)) != ROCPROFILER_STATUS_SUCCESS) { ROCP_FATAL << "Could not find name for record id: " << record_id; } return {info.name}; } void counter_record_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, rocprofiler_record_counter_t* record_data, size_t record_count, rocprofiler_user_data_t user_data, void* /*callback_data_args*/) { static const auto gpu_agents = get_gpu_agents(); static const auto gpu_agents_counter_info = get_agent_counter_info(gpu_agents); auto counter_record = rocprofiler_tool_counter_collection_record_t{}; auto kernel_id = dispatch_data.dispatch_info.kernel_id; counter_record.dispatch_data = dispatch_data; counter_record.thread_id = user_data.value; const kernel_symbol_data* kernel_info = kernel_data->rlock([kernel_id](const auto& _data) { return &_data.at(kernel_id); }); auto lds_block_size_v = (kernel_info->group_segment_size + (lds_block_size - 1)) & ~(lds_block_size - 1); counter_record.arch_vgpr_count = kernel_info->arch_vgpr_count; counter_record.sgpr_count = kernel_info->sgpr_count; counter_record.lds_block_size_v = lds_block_size_v; ROCP_FATAL_IF(!kernel_info) << "missing kernel information for kernel_id=" << kernel_id; ROCP_ERROR_IF(record_count == 0) << "zero record count for kernel_id=" << kernel_id << " (name=" << kernel_info->kernel_name << ")"; for(size_t count = 0; count < record_count; count++) { // Unlikely to trigger, temporary until we move to buffered callbacks if(count >= counter_record.records.size()) { ROCP_WARNING << "Exceeded maximum counter capacity, skipping remaining"; break; } auto _counter_id = rocprofiler_counter_id_t{}; ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id), "query record counter id"); counter_record.records[count] = rocprofiler_tool_record_counter_t{_counter_id, record_data[count]}; counter_record.counter_count++; } write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); } rocprofiler_status_t list_metrics_iterate_agents(rocprofiler_agent_version_t, const void** agents, size_t num_agents, void*) { for(size_t idx = 0; idx < num_agents; idx++) { const auto* agent = static_cast(agents[idx]); auto counters_v = counter_vec_t{}; // TODO(aelwazir): To be changed back to use node id once ROCR fixes // the hsa_agents to use the real node id uint32_t node_id = agent->logical_node_id; ROCPROFILER_CALL( rocprofiler_iterate_agent_supported_counters( agent->id, [](rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) { auto* agent_node_id = static_cast(user_data); for(size_t i = 0; i < num_counters; i++) { rocprofiler_counter_info_v0_t counter_info; auto dimensions = std::vector{}; ROCPROFILER_CALL( rocprofiler_iterate_counter_dimensions(counters[i], dimensions_info_callback, static_cast(&dimensions)), "iterate_dimension_info"); ROCPROFILER_CALL( rocprofiler_query_counter_info(counters[i], ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&counter_info)), "Could not query counter_id"); auto dimensions_info = std::stringstream{}; for(size_t j = 0; j != dimensions.size(); j++) { dimensions_info << dimensions[j].name << "[0:" << dimensions[j].instance_size - 1 << "]"; if(j != dimensions.size() - 1) dimensions_info << "\t"; } if(!counter_info.is_derived && tool::get_config().list_metrics && !std::string(counter_info.block).empty()) { auto counter_info_ss = std::stringstream{}; if(tool::get_config().list_metrics_output_file) { tool::csv::list_basic_metrics_csv_encoder::write_row( counter_info_ss, *agent_node_id, counter_info.name, counter_info.description, counter_info.block, dimensions_info.str()); get_dereference(get_list_basic_metrics_file()) << counter_info_ss.str(); } else { counter_info_ss << "gpu-agent" << *agent_node_id << ":" << "\t" << counter_info.name << "\n"; counter_info_ss << "Description:" << "\t" << counter_info.description << "\n"; counter_info_ss << "Block:" << "\t" << counter_info.block << "\n"; counter_info_ss << "Dimensions:" << "\t" << dimensions_info.str() << "\n"; counter_info_ss << "\n"; std::cout << counter_info_ss.str(); } } else if(counter_info.is_derived && tool::get_config().list_metrics) { auto counter_info_ss = std::stringstream{}; if(tool::get_config().list_metrics_output_file) { tool::csv::list_derived_metrics_csv_encoder::write_row( counter_info_ss, *agent_node_id, counter_info.name, counter_info.description, counter_info.expression, dimensions_info.str()); get_dereference(get_list_derived_metrics_file()) << counter_info_ss.str(); } else { counter_info_ss << "gpu-agent" << *agent_node_id << ":" << "\t" << counter_info.name << "\n" << "Description: " << counter_info.description << "\n"; counter_info_ss << "Expression: " << counter_info.expression << "\n"; counter_info_ss << "Dimensions: " << dimensions_info.str() << "\n"; counter_info_ss << "\n"; std::cout << counter_info_ss.str(); } } } return ROCPROFILER_STATUS_SUCCESS; }, reinterpret_cast(&node_id)), "Iterate rocprofiler counters"); } return ROCPROFILER_STATUS_SUCCESS; } rocprofiler_client_finalize_t client_finalizer = nullptr; rocprofiler_client_id_t* client_identifier = nullptr; void initialize_logging() { auto logging_cfg = rocprofiler::common::logging_config{.install_failure_handler = true}; common::init_logging("ROCPROF", logging_cfg); FLAGS_colorlogtostderr = true; } void initialize_rocprofv3() { ROCP_INFO << "initializing rocprofv3..."; if(int status = 0; rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) { ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), "force configuration"); } LOG_IF(FATAL, !client_identifier) << "nullptr to client identifier!"; LOG_IF(FATAL, !client_finalizer && !tool::get_config().list_metrics) << "nullptr to client finalizer!"; // exception for listing metrics } void finalize_rocprofv3() { ROCP_INFO << "finalizing rocprofv3..."; if(client_finalizer && client_identifier) { client_finalizer(*client_identifier); client_finalizer = nullptr; client_identifier = nullptr; } } timestamps_t* get_app_timestamps() { return stats_timestamp; } void init_tool_table() { // agent and timestamp functions tool_functions->tool_get_agent_node_id_fn = get_agent_node_id; tool_functions->tool_get_app_timestamps_fn = get_app_timestamps; // name functions tool_functions->tool_get_domain_name_fn = get_domain_name; tool_functions->tool_get_kernel_name_fn = get_kernel_name; tool_functions->tool_get_operation_name_fn = get_operation_name; tool_functions->tool_get_counter_info_name_fn = get_counter_info_name; tool_functions->tool_get_callback_kind_fn = get_callback_kind; tool_functions->tool_get_callback_op_name_fn = get_callback_op_name; tool_functions->tool_get_roctx_msg_fn = get_roctx_msg; } void fini_tool_table() { *tool_functions = tool_table{}; } int tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { client_finalizer = fini_func; constexpr uint64_t buffer_size = 4096; constexpr uint64_t buffer_watermark = 4096; rocprofiler_get_timestamp(&(stats_timestamp->app_start_time)); init_tool_table(); ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed"); auto code_obj_ctx = rocprofiler_context_id_t{}; ROCPROFILER_CALL(rocprofiler_create_context(&code_obj_ctx), "failed to create context"); ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(code_obj_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, code_object_tracing_callback, nullptr), "code object tracing configure failed"); ROCPROFILER_CALL(rocprofiler_start_context(code_obj_ctx), "start context failed"); if(tool::get_config().marker_api_trace) { ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, nullptr, 0, callback_tracing_callback, nullptr), "callback tracing service failed to configure"); auto pause_resume_ctx = rocprofiler_context_id_t{}; ROCPROFILER_CALL(rocprofiler_create_context(&pause_resume_ctx), "failed to create context"); ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( pause_resume_ctx, ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, nullptr, 0, cntrl_tracing_callback, static_cast(&get_client_ctx())), "callback tracing service failed to configure"); ROCPROFILER_CALL(rocprofiler_start_context(pause_resume_ctx), "start context failed"); } if(tool::get_config().kernel_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), buffer_size, buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, &get_buffers().kernel_trace), "buffer creation"); ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service(get_client_ctx(), ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, get_buffers().kernel_trace), "buffer tracing service for kernel dispatch configure"); } if(tool::get_config().memory_copy_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), buffer_size, buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, nullptr, &get_buffers().memory_copy_trace), "create memory copy buffer"); ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service(get_client_ctx(), ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, get_buffers().memory_copy_trace), "buffer tracing service for memory copy configure"); } if(tool::get_config().scratch_memory) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), buffer_size, buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, &get_buffers().scratch_memory), "buffer creation"); ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service(get_client_ctx(), ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0, get_buffers().scratch_memory), "buffer tracing service for scratch memory configure"); } if(tool::get_config().hsa_core_api_trace || tool::get_config().hsa_amd_ext_api_trace || tool::get_config().hsa_image_ext_api_trace || tool::get_config().hsa_finalizer_ext_api_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), buffer_size, buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, &get_buffers().hsa_api_trace), "buffer creation"); using optpair_t = std::pair; for(auto itr : {optpair_t{tool::get_config().hsa_core_api_trace, ROCPROFILER_BUFFER_TRACING_HSA_CORE_API}, optpair_t{tool::get_config().hsa_amd_ext_api_trace, ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API}, optpair_t{tool::get_config().hsa_image_ext_api_trace, ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API}, optpair_t{tool::get_config().hsa_finalizer_ext_api_trace, ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API}}) { if(itr.first) { ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service( get_client_ctx(), itr.second, nullptr, 0, get_buffers().hsa_api_trace), "buffer tracing service for hsa api configure"); } } } if(tool::get_config().hip_runtime_api_trace || tool::get_config().hip_compiler_api_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), buffer_size, buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, &get_buffers().hip_api_trace), "buffer creation"); if(tool::get_config().hip_runtime_api_trace) { ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( get_client_ctx(), ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, nullptr, 0, get_buffers().hip_api_trace), "buffer tracing service for hip api configure"); } if(tool::get_config().hip_compiler_api_trace) { ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( get_client_ctx(), ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, nullptr, 0, get_buffers().hip_api_trace), "buffer tracing service for hip compiler api configure"); } } if(tool::get_config().counter_collection) { ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_profile_counting_service( get_client_ctx(), dispatch_callback, nullptr, counter_record_callback, nullptr), "Could not setup counting service"); } for(auto itr : get_buffers().as_array()) { if(itr.handle > 0) { auto cb_thread = rocprofiler_callback_thread_t{}; ROCP_INFO << "creating dedicated callback thread for buffer " << itr.handle; ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread), "creating callback thread"); ROCP_INFO << "assigning buffer " << itr.handle << " to callback thread " << cb_thread.handle; ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, cb_thread), "assigning callback thread"); } } ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed"); return 0; } void api_registration_callback(rocprofiler_intercept_table_t, uint64_t, uint64_t, void**, uint64_t, void*) { ROCPROFILER_CALL(rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, list_metrics_iterate_agents, sizeof(rocprofiler_agent_t), nullptr), "Iterate rocporfiler agents") } using stats_data_t = ::rocprofiler::tool::stats_data_t; template void generate_output(rocprofiler::tool::buffered_output& output_v, std::unordered_map& contributions_v) { if(!output_v) return; output_v.read(); if(tool::get_config().csv_output) { output_v.stats = rocprofiler::tool::generate_csv(tool_functions, output_v.element_data); contributions_v.emplace(output_v.buffer_type_v, output_v.stats); } } void tool_fini(void* /*tool_data*/) { client_identifier = nullptr; client_finalizer = nullptr; rocprofiler_get_timestamp(&(stats_timestamp->app_end_time)); rocprofiler_stop_context(get_client_ctx()); flush(); auto kernel_dispatch_output = kernel_dispatch_buffered_output_t{tool::get_config().kernel_trace}; auto hsa_output = hsa_buffered_output_t{tool::get_config().hsa_core_api_trace || tool::get_config().hsa_amd_ext_api_trace || tool::get_config().hsa_image_ext_api_trace || tool::get_config().hsa_finalizer_ext_api_trace}; auto hip_output = hip_buffered_output_t{tool::get_config().hip_runtime_api_trace || tool::get_config().hip_compiler_api_trace}; auto memory_copy_output = memory_copy_buffered_output_t{tool::get_config().memory_copy_trace}; auto marker_output = marker_buffered_output_t{tool::get_config().marker_api_trace}; auto counters_output = counter_collection_buffered_output_t{tool::get_config().counter_collection}; auto scratch_memory_output = scratch_memory_buffered_output_t{tool::get_config().scratch_memory}; auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }; auto _agents = std::vector{}; _agents.reserve(agent_info->size()); for(auto& itr : *agent_info) _agents.emplace_back(itr.second); auto _counters = get_tool_counter_info(); std::sort(_agents.begin(), _agents.end(), node_id_sort); if(tool::get_config().csv_output) { rocprofiler::tool::generate_csv(tool_functions, _agents); } auto contributions = std::unordered_map{}; generate_output(kernel_dispatch_output, contributions); generate_output(hsa_output, contributions); generate_output(hip_output, contributions); generate_output(memory_copy_output, contributions); generate_output(marker_output, contributions); generate_output(counters_output, contributions); generate_output(scratch_memory_output, contributions); if(tool::get_config().stats && tool::get_config().csv_output) { rocprofiler::tool::generate_csv(tool_functions, contributions); } if(tool::get_config().json_output) { rocprofiler::tool::write_json(tool_functions, getpid(), _agents, _counters, &hip_output.element_data, &hsa_output.element_data, &kernel_dispatch_output.element_data, &memory_copy_output.element_data, &counters_output.element_data, &marker_output.element_data, &scratch_memory_output.element_data); } if(tool::get_config().pftrace_output) { rocprofiler::tool::write_perfetto(tool_functions, getpid(), _agents, &hip_output.element_data, &hsa_output.element_data, &kernel_dispatch_output.element_data, &memory_copy_output.element_data, &marker_output.element_data, &scratch_memory_output.element_data); } auto destroy_output = [](auto& _buffered_output_v) { _buffered_output_v.destroy(); }; destroy_output(kernel_dispatch_output); destroy_output(hsa_output); destroy_output(hip_output); destroy_output(memory_copy_output); destroy_output(marker_output); destroy_output(counters_output); destroy_output(scratch_memory_output); fini_tool_table(); if(destructors) { for(const auto& itr : *destructors) itr(); delete destructors; destructors = nullptr; } #if defined(CODECOV) && CODECOV > 0 __gcov_dump(); #endif } } // namespace std::map get_callback_roctx_msg() { auto _data = marker_msg_data->rlock([](const auto& _data_v) { return _data_v; }); auto _ret = std::map{}; for(const auto& itr : _data) _ret.emplace(itr.first, itr.second); return _ret; } std::vector get_kernel_symbol_data() { auto _data = kernel_data->rlock([](const auto& _data_v) { auto _info = std::vector{}; _info.reserve(_data_v.size()); for(const auto& itr : _data_v) _info.emplace_back(itr.second); return _info; }); uint64_t kernel_data_size = 0; for(const auto& itr : _data) kernel_data_size = std::max(kernel_data_size, itr.kernel_id); auto _symbol_data = std::vector{}; _symbol_data.resize(kernel_data_size + 1, kernel_symbol_data{}); // index by the kernel id for(auto& itr : _data) _symbol_data.at(itr.kernel_id) = std::move(itr); return _symbol_data; } std::vector get_code_object_data() { auto _data = code_obj_data->rlock([](const auto& _data_v) { auto _info = std::vector{}; _info.reserve(_data_v.size()); for(const auto& itr : _data_v) _info.emplace_back(itr.second); return _info; }); uint64_t _sz = 0; for(const auto& itr : _data) _sz = std::max(_sz, itr.code_object_id); auto _code_obj_data = std::vector{}; _code_obj_data.resize(_sz + 1, rocprofiler_code_object_data_t{}); // index by the code object id for(auto& itr : _data) _code_obj_data.at(itr.code_object_id) = itr; return _code_obj_data; } std::vector get_tool_counter_info() { auto _data = get_agent_counter_info(get_gpu_agents()); auto _ret = std::vector{}; for(const auto& itr : _data) { for(const auto& iitr : itr.second) _ret.emplace_back(iitr); } return _ret; } std::vector get_tool_counter_dimension_info() { auto _data = get_agent_counter_info(get_gpu_agents()); auto _ret = std::vector{}; for(const auto& itr : _data) { for(const auto& iitr : itr.second) for(const auto& ditr : iitr.dimension_info) _ret.emplace_back(ditr); } auto _sorter = [](const rocprofiler_record_dimension_info_t& lhs, const rocprofiler_record_dimension_info_t& rhs) { return std::tie(lhs.id, lhs.instance_size) < std::tie(rhs.id, rhs.instance_size); }; auto _equiv = [](const rocprofiler_record_dimension_info_t& lhs, const rocprofiler_record_dimension_info_t& rhs) { return std::tie(lhs.id, lhs.instance_size) == std::tie(rhs.id, rhs.instance_size); }; std::sort(_ret.begin(), _ret.end(), _sorter); _ret.erase(std::unique(_ret.begin(), _ret.end(), _equiv), _ret.end()); return _ret; } namespace { using main_func_t = int (*)(int, char**, char**); main_func_t& get_main_function() { static main_func_t user_main = nullptr; return user_main; } } // namespace #define ROCPROFV3_INTERNAL_API __attribute__((visibility("internal"))); extern "C" { void rocprofv3_set_main(main_func_t main_func) ROCPROFV3_INTERNAL_API; int rocprofv3_main(int argc, char** argv, char** envp) ROCPROFV3_INTERNAL_API; rocprofiler_tool_configure_result_t* rocprofiler_configure(uint32_t version, const char* runtime_version, uint32_t priority, rocprofiler_client_id_t* id) { initialize_logging(); // set the client name id->name = "rocprofv3"; // store client info client_identifier = id; // note that rocprofv3 is not the primary tool ROCP_WARNING_IF(priority > 0) << id->name << " has a priority of " << priority << " (not primary tool)"; // compute major/minor/patch version info uint32_t major = version / 10000; uint32_t minor = (version % 10000) / 100; uint32_t patch = version % 100; // ensure these pointers are not leaked add_destructor(buffered_name_info); add_destructor(callback_name_info); add_destructor(marker_msg_data); add_destructor(code_obj_data); add_destructor(kernel_data); add_destructor(tool_functions); add_destructor(agent_info); add_destructor(stats_timestamp); // in case main wrapper is not used ::atexit(finalize_rocprofv3); if(tool::get_config().list_metrics) { ROCPROFILER_CALL(rocprofiler_at_intercept_table_registration( api_registration_callback, ROCPROFILER_HSA_TABLE, nullptr), "api registration"); return nullptr; } ROCPROFILER_CALL( rocprofiler_query_available_agents( ROCPROFILER_AGENT_INFO_VERSION_0, [](rocprofiler_agent_version_t, const void** agents, size_t num_agents, void*) { for(size_t i = 0; i < num_agents; ++i) { auto* agent = static_cast(agents[i]); agent_info->emplace(agent->id, *agent); } return ROCPROFILER_STATUS_SUCCESS; }, sizeof(rocprofiler_agent_t), nullptr), "Iterate rocporfiler agents") ROCP_INFO << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch << " (" << runtime_version << ")"; // create configure data static auto cfg = rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr}; // return pointer to configure data return &cfg; // data passed around all the callbacks } void rocprofv3_set_main(main_func_t main_func) { get_main_function() = main_func; } int rocprofv3_main(int argc, char** argv, char** envp) { initialize_logging(); initialize_rocprofv3(); auto ret = CHECK_NOTNULL(get_main_function())(argc, argv, envp); finalize_rocprofv3(); ROCP_INFO << "rocprofv3 finished. exit code: " << ret; return ret; } }