From 998fce25e0fff4ed7d0048903e42eebabaaf5a7b Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Wed, 27 Mar 2024 00:21:52 -0300 Subject: [PATCH] Fixing XCC-based metrics Change-Id: I7451355b5b5eeefbd1c78b996d4ca5c4b770246a [ROCm/rocprofiler commit: bf749ba8d67f2f841f25486704b5a2a36bfd34ef] --- .../core/counters/metrics/eval_metrics.cpp | 75 +++++++------------ .../src/core/counters/metrics/eval_metrics.h | 11 +-- 2 files changed, 28 insertions(+), 58 deletions(-) diff --git a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp index 44139830a4..c1f5b427ff 100644 --- a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp +++ b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp @@ -30,8 +30,6 @@ struct block_status_t { typedef struct { std::vector* results; size_t index; - uint32_t single_xcc_buff_size; - uint32_t umc_buff_size; } callback_data_t; static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1, @@ -40,16 +38,6 @@ static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1, (event1.counter_id == event2.counter_id); } -uint32_t calculate_xcc_index(callback_data_t* passed_data) { - // xcc_0 is special case as it contains all umc event results - // after xcc_0, there are no umc event results - uint32_t xcc_zero_size = passed_data->umc_buff_size + passed_data->single_xcc_buff_size; - uint32_t xcc_index = 0; - if (passed_data->index >= xcc_zero_size) - xcc_index = 1 + floor((passed_data->index - xcc_zero_size) / passed_data->single_xcc_buff_size); - return xcc_index; -} - hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) { hsa_status_t status = HSA_STATUS_SUCCESS; @@ -61,9 +49,8 @@ hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, if (info_type != HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) continue; if (!IsEventMatch(info_data->pmc_data.event, (*data_it)->event)) continue; - uint32_t xcc_index = calculate_xcc_index(passed_data); - // stores event result from each xcc separately - (*data_it)->xcc_vals.at(xcc_index) += info_data->pmc_data.result; + // stores event result from each event separately + (*data_it)->xcc_vals.push_back(info_data->pmc_data.result); // stores accumulated event result from all xccs (*data_it)->val_double += info_data->pmc_data.result; } @@ -197,38 +184,30 @@ bool metrics::ExtractMetricEvents( return true; } +bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent, + std::vector& results_list) { + size_t gpu_xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount(); + callback_data_t callback_data{&results_list, 0}; + hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data); -std::pair get_umc_and_xcc_sample_count( - hsa_ven_amd_aqlprofile_profile_t* profile, uint32_t xcc_num) { - const uint32_t UMC_SAMPLE_BYTE_SIZE = 8; + for (auto& data : *(callback_data.results)) + { + size_t xcc_count = (data->event.block_name != HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC) ? gpu_xcc_count : 1; + std::vector xcc_results = std::move(data->xcc_vals); + data->xcc_vals.clear(); + data->xcc_vals.reserve(xcc_count); + size_t split = (xcc_results.size()+xcc_count-1) / xcc_count; - uint32_t umc_sample_count = 0; - if (xcc_num > 1) { - // We count the UMC samples per XCC for MI300: for each event there are AID samples - for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events; - p < profile->events + profile->event_count; ++p) { - if (p->block_name == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC) { - ++umc_sample_count; - } + // Exploit the fact AQLprofile calls counter in order per XCC + for (size_t xcc=0; xcc < xcc_count; xcc++) + { + double accumulated = 0; + for (size_t s=xcc*split; sxcc_vals.push_back(accumulated); } } - // per xcc sample count - uint32_t xcc_sample_count = - (profile->output_buffer.size - umc_sample_count * UMC_SAMPLE_BYTE_SIZE) / - (sizeof(uint64_t) * xcc_num); - - return std::make_pair(xcc_sample_count, umc_sample_count); -} - -bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent, - std::vector& results_list) { - uint32_t xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount(); - auto umc_count_and_xcc_sample_count = get_umc_and_xcc_sample_count(profile, xcc_count); - uint32_t single_xcc_buff_size = umc_count_and_xcc_sample_count.first; - uint32_t umc_buff_size = umc_count_and_xcc_sample_count.second; - callback_data_t callback_data{&results_list, 0, single_xcc_buff_size, umc_buff_size}; - hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data); return (status == HSA_STATUS_SUCCESS); } @@ -257,15 +236,11 @@ void metrics::GetCountersAndMetricResultsByXcc(uint32_t xcc_index, std::map& results_map, std::vector& metrics_list, uint64_t kernel_duration) { - for (auto it = results_list.begin(); it != results_list.end(); it++) { - (*it)->val_double = - (*it)->xcc_vals[xcc_index]; // set val_double to hold value for specific xcc - } + for (auto* it : results_list) // set val_double to hold value for specific xcc + it->val_double = it->xcc_vals[xcc_index]; - for (auto it = results_map.begin(); it != results_map.end(); it++) { - it->second->val_double = - it->second->xcc_vals[xcc_index]; // set val_double to hold value for specific xcc - } + for (auto& [str, it] : results_map) + it->val_double = it->xcc_vals[xcc_index]; GetMetricsData(results_map, metrics_list, kernel_duration); } diff --git a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.h b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.h index ec51b15834..6151d43265 100644 --- a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.h +++ b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.h @@ -33,18 +33,13 @@ THE SOFTWARE. namespace rocprofiler { -typedef std::vector xcc_results_t; - class results_t { public: - results_t(std::string in_name, event_t in_event, uint32_t xcc_count) - : name(in_name), val_double(0), event(in_event) { - xcc_vals.resize(xcc_count); - std::fill(xcc_vals.begin(), xcc_vals.end(), 0); - } + results_t(std::string in_name, event_t in_event, int xcc_count) + : name(in_name), val_double(0), event(in_event) { xcc_vals.reserve(xcc_count); } std::string name; double val_double; - xcc_results_t xcc_vals; + std::vector xcc_vals; event_t event; };