diff --git a/common/rdc_field.data b/common/rdc_field.data index 2e584b3fba..bcdd83188b 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -102,8 +102,14 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB) // This doesn't map to rocprofiler counters directly // See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h // See metrics.xml in rocprofiler -FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false) -FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false) +FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false) +FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false) +FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false) // Events FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false) diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 74a14009e3..39ce404ee0 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -230,13 +230,13 @@ typedef enum { // In new ASCI, such as MI300, the XGMI events is not supported // Using below XGMI related fields to calculate the bandwidth. RDC_FI_XGMI_0_READ_KB = 700, //!< XGMI_0 accumulated data read size (KB) - RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB) - RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB) - RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB) - RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB) - RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB) - RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB) - RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB) + RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB) + RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB) + RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB) + RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB) + RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB) + RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB) + RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB) RDC_FI_XGMI_0_WRITE_KB, //!< XGMI_0 accumulated data write size (KB) RDC_FI_XGMI_1_WRITE_KB, //!< XGMI_1 accumulated data write size (KB) @@ -250,8 +250,14 @@ typedef enum { /** * @brief ROC-profiler related fields */ - RDC_FI_PROF_GPU_UTIL = 800, //!< - RDC_FI_PROF_TA_BUSY_AVR, //!< + RDC_FI_PROF_CU_UTILIZATION = 800, + RDC_FI_PROF_CU_OCCUPANCY, + RDC_FI_PROF_FLOPS_16, + RDC_FI_PROF_FLOPS_32, + RDC_FI_PROF_FLOPS_64, + RDC_FI_PROF_ACTIVE_CYCLES, + RDC_FI_PROF_ACTIVE_WAVES, + RDC_FI_PROF_ELAPSED_CYCLES, /* * @brief Raw XGMI counter events diff --git a/include/rdc_modules/rdc_rocp/RdcRocpBase.h b/include/rdc_modules/rdc_rocp/RdcRocpBase.h index c6a14f39de..c210278df6 100644 --- a/include/rdc_modules/rdc_rocp/RdcRocpBase.h +++ b/include/rdc_modules/rdc_rocp/RdcRocpBase.h @@ -39,17 +39,6 @@ THE SOFTWARE. namespace amd { namespace rdc { -/** - * @brief Map of RDC fields to rocmtools counters - * - * See metrics.xml in rocmtools for more info. - * RDC_CALC fields are calculated over time by RDC. - */ -static const std::map counter_map_k = { - {RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"}, - {RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"}, -}; - typedef struct { hsa_agent_t* agents; unsigned count; @@ -79,17 +68,20 @@ class RdcRocpBase { * successfully. */ rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value); + const char* get_field_id_from_name(rdc_field_t); + const std::vector get_field_ids(); protected: private: rocprofiler_t* contexts[dev_count] = {nullptr}; static const int features_count = 1; - std::map metrics; - rocprofiler_feature_t features[dev_count][features_count]; + std::map metrics = {}; + rocprofiler_feature_t features[dev_count][features_count] = {}; void read_features(rocprofiler_t* context, const unsigned feature_count); - int run_profiler(const char* feature_name, hsa_queue_t** queues); + int run_profiler(const char* feature_name); hsa_queue_t* queues[dev_count] = {nullptr}; - hsa_agent_arr_t agent_arr; + hsa_agent_arr_t agent_arr = {}; + std::map counter_map_k = {}; /** * @brief Convert from rocmtools status into RDC status diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc index 1efb594416..14f91d26af 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc @@ -26,7 +26,9 @@ THE SOFTWARE. #include #include +#include #include +#include // #include "hsa.h" #include "rdc/rdc.h" @@ -114,7 +116,7 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) { return (status == HSA_STATUS_SUCCESS); } -int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) { +int RdcRocpBase::run_profiler(const char* feature_name) { const char* events[features_count] = {feature_name}; // initialize hsa. hsa_init() will also load the profiler libs under the hood @@ -176,14 +178,39 @@ int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) { return 0; } -RdcRocpBase::RdcRocpBase() { - // populate monitored fields - const std::map counter_map_k = { - {RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"}, - }; - std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n"; +const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) { + return counter_map_k.at(field); +} + +const std::vector RdcRocpBase::get_field_ids() { + std::vector field_ids; for (auto& [k, v] : counter_map_k) { - metrics[v] = 0.0; + field_ids.push_back(k); + } + return field_ids; +} + +RdcRocpBase::RdcRocpBase() { + counter_map_k = { + {RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"}, + {RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"}, + {RDC_FI_PROF_FLOPS_16, "FLOPS_16"}, + {RDC_FI_PROF_FLOPS_32, "FLOPS_32"}, + {RDC_FI_PROF_FLOPS_64, "FLOPS_64"}, + {RDC_FI_PROF_ACTIVE_CYCLES, "ACTIVE_CYCLES"}, + {RDC_FI_PROF_ACTIVE_WAVES, "ACTIVE_WAVES"}, + {RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"}, + }; + + // populate monitored fields + std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n"; + + for (auto& k : counter_map_k) { + printf("metric %d = %s\n", k.first, k.second); + } + for (auto& [k, v] : counter_map_k) { + const char* str = v; + metrics.emplace(std::make_pair(str, 0.0)); } assert(metrics.size() == counter_map_k.size()); @@ -253,8 +280,7 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) } switch (gpu_field.second) { default: - run_profiler("TA_BUSY_avr", queues); - // read_features(contexts[gpu_field.first], features_count); + run_profiler(counter_map_k.at(gpu_field.second)); *value = metrics[counter_map_k.at(gpu_field.second)]; break; } diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc index b175a39634..8d17186d81 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include +#include #include #include #include @@ -42,11 +43,8 @@ rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; } // TODO: Query fields with rocprofiler rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) { // extract all keys from counter_map - std::vector counter_keys; - counter_keys.reserve(amd::rdc::counter_map_k.size()); - for (auto it : amd::rdc::counter_map_k) { - counter_keys.push_back(it.first); - } + std::vector fields = rocp.get_field_ids(); + std::vector counter_keys(fields.begin(), fields.end()); *field_count = counter_keys.size(); // copy from vector into array