Fixing XCC-based metrics

Change-Id: I7451355b5b5eeefbd1c78b996d4ca5c4b770246a


[ROCm/rocprofiler commit: bf749ba8d6]
このコミットが含まれているのは:
Giovanni LB
2024-03-27 00:21:52 -03:00
committed by Giovanni Baraldi
コミット 998fce25e0
2個のファイルの変更28行の追加58行の削除
+25 -50
ファイルの表示
@@ -30,8 +30,6 @@ struct block_status_t {
typedef struct {
std::vector<results_t*>* results;
size_t index;
uint32_t single_xcc_buff_size;
uint32_t umc_buff_size;
} callback_data_t;
static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1,
@@ -40,16 +38,6 @@ static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1,
(event1.counter_id == event2.counter_id);
}
uint32_t calculate_xcc_index(callback_data_t* passed_data) {
// xcc_0 is special case as it contains all umc event results
// after xcc_0, there are no umc event results
uint32_t xcc_zero_size = passed_data->umc_buff_size + passed_data->single_xcc_buff_size;
uint32_t xcc_index = 0;
if (passed_data->index >= xcc_zero_size)
xcc_index = 1 + floor((passed_data->index - xcc_zero_size) / passed_data->single_xcc_buff_size);
return xcc_index;
}
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
@@ -61,9 +49,8 @@ hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
if (info_type != HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) continue;
if (!IsEventMatch(info_data->pmc_data.event, (*data_it)->event)) continue;
uint32_t xcc_index = calculate_xcc_index(passed_data);
// stores event result from each xcc separately
(*data_it)->xcc_vals.at(xcc_index) += info_data->pmc_data.result;
// stores event result from each event separately
(*data_it)->xcc_vals.push_back(info_data->pmc_data.result);
// stores accumulated event result from all xccs
(*data_it)->val_double += info_data->pmc_data.result;
}
@@ -197,38 +184,30 @@ bool metrics::ExtractMetricEvents(
return true;
}
bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent,
std::vector<results_t*>& results_list) {
size_t gpu_xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
callback_data_t callback_data{&results_list, 0};
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data);
std::pair<uint32_t, uint32_t> get_umc_and_xcc_sample_count(
hsa_ven_amd_aqlprofile_profile_t* profile, uint32_t xcc_num) {
const uint32_t UMC_SAMPLE_BYTE_SIZE = 8;
for (auto& data : *(callback_data.results))
{
size_t xcc_count = (data->event.block_name != HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC) ? gpu_xcc_count : 1;
std::vector<double> xcc_results = std::move(data->xcc_vals);
data->xcc_vals.clear();
data->xcc_vals.reserve(xcc_count);
size_t split = (xcc_results.size()+xcc_count-1) / xcc_count;
uint32_t umc_sample_count = 0;
if (xcc_num > 1) {
// We count the UMC samples per XCC for MI300: for each event there are AID samples
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
p < profile->events + profile->event_count; ++p) {
if (p->block_name == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC) {
++umc_sample_count;
}
// Exploit the fact AQLprofile calls counter in order per XCC
for (size_t xcc=0; xcc < xcc_count; xcc++)
{
double accumulated = 0;
for (size_t s=xcc*split; s<std::min(xcc_results.size(), (xcc+1)*split); s++)
accumulated += xcc_results.at(s);
data->xcc_vals.push_back(accumulated);
}
}
// per xcc sample count
uint32_t xcc_sample_count =
(profile->output_buffer.size - umc_sample_count * UMC_SAMPLE_BYTE_SIZE) /
(sizeof(uint64_t) * xcc_num);
return std::make_pair(xcc_sample_count, umc_sample_count);
}
bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent,
std::vector<results_t*>& results_list) {
uint32_t xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
auto umc_count_and_xcc_sample_count = get_umc_and_xcc_sample_count(profile, xcc_count);
uint32_t single_xcc_buff_size = umc_count_and_xcc_sample_count.first;
uint32_t umc_buff_size = umc_count_and_xcc_sample_count.second;
callback_data_t callback_data{&results_list, 0, single_xcc_buff_size, umc_buff_size};
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data);
return (status == HSA_STATUS_SUCCESS);
}
@@ -257,15 +236,11 @@ void metrics::GetCountersAndMetricResultsByXcc(uint32_t xcc_index,
std::map<std::string, results_t*>& results_map,
std::vector<const Metric*>& metrics_list,
uint64_t kernel_duration) {
for (auto it = results_list.begin(); it != results_list.end(); it++) {
(*it)->val_double =
(*it)->xcc_vals[xcc_index]; // set val_double to hold value for specific xcc
}
for (auto* it : results_list) // set val_double to hold value for specific xcc
it->val_double = it->xcc_vals[xcc_index];
for (auto it = results_map.begin(); it != results_map.end(); it++) {
it->second->val_double =
it->second->xcc_vals[xcc_index]; // set val_double to hold value for specific xcc
}
for (auto& [str, it] : results_map)
it->val_double = it->xcc_vals[xcc_index];
GetMetricsData(results_map, metrics_list, kernel_duration);
}
+3 -8
ファイルの表示
@@ -33,18 +33,13 @@ THE SOFTWARE.
namespace rocprofiler {
typedef std::vector<double> xcc_results_t;
class results_t {
public:
results_t(std::string in_name, event_t in_event, uint32_t xcc_count)
: name(in_name), val_double(0), event(in_event) {
xcc_vals.resize(xcc_count);
std::fill(xcc_vals.begin(), xcc_vals.end(), 0);
}
results_t(std::string in_name, event_t in_event, int xcc_count)
: name(in_name), val_double(0), event(in_event) { xcc_vals.reserve(xcc_count); }
std::string name;
double val_double;
xcc_results_t xcc_vals;
std::vector<double> xcc_vals;
event_t event;
};