Profiler - Add all required metrics

Change-Id: Iea3938df9407789c061c3a6ead9167a69069d6e6
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>


[ROCm/rdc commit: c3a4c899d5]
Esse commit está contido em:
Galantsev, Dmitrii
2024-05-09 14:40:53 -05:00
commit 83cf97e280
5 arquivos alterados com 69 adições e 41 exclusões
+8 -2
Ver Arquivo
@@ -102,8 +102,14 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
// This doesn't map to rocprofiler counters directly
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
// See metrics.xml in rocprofiler
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false)
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false)
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false)
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false)
// Events
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
+15 -9
Ver Arquivo
@@ -230,13 +230,13 @@ typedef enum {
// In new ASCI, such as MI300, the XGMI events is not supported
// Using below XGMI related fields to calculate the bandwidth.
RDC_FI_XGMI_0_READ_KB = 700, //!< XGMI_0 accumulated data read size (KB)
RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB)
RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB)
RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB)
RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB)
RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB)
RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB)
RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB)
RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB)
RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB)
RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB)
RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB)
RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB)
RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB)
RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB)
RDC_FI_XGMI_0_WRITE_KB, //!< XGMI_0 accumulated data write size (KB)
RDC_FI_XGMI_1_WRITE_KB, //!< XGMI_1 accumulated data write size (KB)
@@ -250,8 +250,14 @@ typedef enum {
/**
* @brief ROC-profiler related fields
*/
RDC_FI_PROF_GPU_UTIL = 800, //!<
RDC_FI_PROF_TA_BUSY_AVR, //!<
RDC_FI_PROF_CU_UTILIZATION = 800,
RDC_FI_PROF_CU_OCCUPANCY,
RDC_FI_PROF_FLOPS_16,
RDC_FI_PROF_FLOPS_32,
RDC_FI_PROF_FLOPS_64,
RDC_FI_PROF_ACTIVE_CYCLES,
RDC_FI_PROF_ACTIVE_WAVES,
RDC_FI_PROF_ELAPSED_CYCLES,
/*
* @brief Raw XGMI counter events
@@ -39,17 +39,6 @@ THE SOFTWARE.
namespace amd {
namespace rdc {
/**
* @brief Map of RDC fields to rocmtools counters
*
* See metrics.xml in rocmtools for more info.
* RDC_CALC fields are calculated over time by RDC.
*/
static const std::map<rdc_field_t, const char*> counter_map_k = {
{RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"},
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
};
typedef struct {
hsa_agent_t* agents;
unsigned count;
@@ -79,17 +68,20 @@ class RdcRocpBase {
* successfully.
*/
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
const char* get_field_id_from_name(rdc_field_t);
const std::vector<rdc_field_t> get_field_ids();
protected:
private:
rocprofiler_t* contexts[dev_count] = {nullptr};
static const int features_count = 1;
std::map<const char*, double> metrics;
rocprofiler_feature_t features[dev_count][features_count];
std::map<const char*, double> metrics = {};
rocprofiler_feature_t features[dev_count][features_count] = {};
void read_features(rocprofiler_t* context, const unsigned feature_count);
int run_profiler(const char* feature_name, hsa_queue_t** queues);
int run_profiler(const char* feature_name);
hsa_queue_t* queues[dev_count] = {nullptr};
hsa_agent_arr_t agent_arr;
hsa_agent_arr_t agent_arr = {};
std::map<rdc_field_t, const char*> counter_map_k = {};
/**
* @brief Convert from rocmtools status into RDC status
@@ -26,7 +26,9 @@ THE SOFTWARE.
#include <unistd.h>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <utility>
// #include "hsa.h"
#include "rdc/rdc.h"
@@ -114,7 +116,7 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
return (status == HSA_STATUS_SUCCESS);
}
int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
int RdcRocpBase::run_profiler(const char* feature_name) {
const char* events[features_count] = {feature_name};
// initialize hsa. hsa_init() will also load the profiler libs under the hood
@@ -176,14 +178,39 @@ int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
return 0;
}
RdcRocpBase::RdcRocpBase() {
// populate monitored fields
const std::map<rdc_field_t, const char*> counter_map_k = {
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
};
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) {
return counter_map_k.at(field);
}
const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
std::vector<rdc_field_t> field_ids;
for (auto& [k, v] : counter_map_k) {
metrics[v] = 0.0;
field_ids.push_back(k);
}
return field_ids;
}
RdcRocpBase::RdcRocpBase() {
counter_map_k = {
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
{RDC_FI_PROF_FLOPS_16, "FLOPS_16"},
{RDC_FI_PROF_FLOPS_32, "FLOPS_32"},
{RDC_FI_PROF_FLOPS_64, "FLOPS_64"},
{RDC_FI_PROF_ACTIVE_CYCLES, "ACTIVE_CYCLES"},
{RDC_FI_PROF_ACTIVE_WAVES, "ACTIVE_WAVES"},
{RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"},
};
// populate monitored fields
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
for (auto& k : counter_map_k) {
printf("metric %d = %s\n", k.first, k.second);
}
for (auto& [k, v] : counter_map_k) {
const char* str = v;
metrics.emplace(std::make_pair(str, 0.0));
}
assert(metrics.size() == counter_map_k.size());
@@ -253,8 +280,7 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value)
}
switch (gpu_field.second) {
default:
run_profiler("TA_BUSY_avr", queues);
// read_features(contexts[gpu_field.first], features_count);
run_profiler(counter_map_k.at(gpu_field.second));
*value = metrics[counter_map_k.at(gpu_field.second)];
break;
}
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include <sys/time.h>
#include <algorithm>
#include <cstring>
#include <map>
#include <memory>
@@ -42,11 +43,8 @@ rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
// TODO: Query fields with rocprofiler
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
// extract all keys from counter_map
std::vector<uint32_t> counter_keys;
counter_keys.reserve(amd::rdc::counter_map_k.size());
for (auto it : amd::rdc::counter_map_k) {
counter_keys.push_back(it.first);
}
std::vector<rdc_field_t> fields = rocp.get_field_ids();
std::vector<uint32_t> counter_keys(fields.begin(), fields.end());
*field_count = counter_keys.size();
// copy from vector into array