Profiler - Add all required metrics
Change-Id: Iea3938df9407789c061c3a6ead9167a69069d6e6
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rdc commit: c3a4c899d5]
Esse commit está contido em:
@@ -102,8 +102,14 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
|
||||
// This doesn't map to rocprofiler counters directly
|
||||
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
||||
// See metrics.xml in rocprofiler
|
||||
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false)
|
||||
|
||||
// Events
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
||||
|
||||
@@ -230,13 +230,13 @@ typedef enum {
|
||||
// In new ASCI, such as MI300, the XGMI events is not supported
|
||||
// Using below XGMI related fields to calculate the bandwidth.
|
||||
RDC_FI_XGMI_0_READ_KB = 700, //!< XGMI_0 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_1_READ_KB, //!< XGMI_1 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_2_READ_KB, //!< XGMI_2 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_3_READ_KB, //!< XGMI_3 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_4_READ_KB, //!< XGMI_4 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_5_READ_KB, //!< XGMI_5 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_6_READ_KB, //!< XGMI_6 accumulated data read size (KB)
|
||||
RDC_FI_XGMI_7_READ_KB, //!< XGMI_7 accumulated data read size (KB)
|
||||
|
||||
RDC_FI_XGMI_0_WRITE_KB, //!< XGMI_0 accumulated data write size (KB)
|
||||
RDC_FI_XGMI_1_WRITE_KB, //!< XGMI_1 accumulated data write size (KB)
|
||||
@@ -250,8 +250,14 @@ typedef enum {
|
||||
/**
|
||||
* @brief ROC-profiler related fields
|
||||
*/
|
||||
RDC_FI_PROF_GPU_UTIL = 800, //!<
|
||||
RDC_FI_PROF_TA_BUSY_AVR, //!<
|
||||
RDC_FI_PROF_CU_UTILIZATION = 800,
|
||||
RDC_FI_PROF_CU_OCCUPANCY,
|
||||
RDC_FI_PROF_FLOPS_16,
|
||||
RDC_FI_PROF_FLOPS_32,
|
||||
RDC_FI_PROF_FLOPS_64,
|
||||
RDC_FI_PROF_ACTIVE_CYCLES,
|
||||
RDC_FI_PROF_ACTIVE_WAVES,
|
||||
RDC_FI_PROF_ELAPSED_CYCLES,
|
||||
|
||||
/*
|
||||
* @brief Raw XGMI counter events
|
||||
|
||||
@@ -39,17 +39,6 @@ THE SOFTWARE.
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
/**
|
||||
* @brief Map of RDC fields to rocmtools counters
|
||||
*
|
||||
* See metrics.xml in rocmtools for more info.
|
||||
* RDC_CALC fields are calculated over time by RDC.
|
||||
*/
|
||||
static const std::map<rdc_field_t, const char*> counter_map_k = {
|
||||
{RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"},
|
||||
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
hsa_agent_t* agents;
|
||||
unsigned count;
|
||||
@@ -79,17 +68,20 @@ class RdcRocpBase {
|
||||
* successfully.
|
||||
*/
|
||||
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
|
||||
const char* get_field_id_from_name(rdc_field_t);
|
||||
const std::vector<rdc_field_t> get_field_ids();
|
||||
|
||||
protected:
|
||||
private:
|
||||
rocprofiler_t* contexts[dev_count] = {nullptr};
|
||||
static const int features_count = 1;
|
||||
std::map<const char*, double> metrics;
|
||||
rocprofiler_feature_t features[dev_count][features_count];
|
||||
std::map<const char*, double> metrics = {};
|
||||
rocprofiler_feature_t features[dev_count][features_count] = {};
|
||||
void read_features(rocprofiler_t* context, const unsigned feature_count);
|
||||
int run_profiler(const char* feature_name, hsa_queue_t** queues);
|
||||
int run_profiler(const char* feature_name);
|
||||
hsa_queue_t* queues[dev_count] = {nullptr};
|
||||
hsa_agent_arr_t agent_arr;
|
||||
hsa_agent_arr_t agent_arr = {};
|
||||
std::map<rdc_field_t, const char*> counter_map_k = {};
|
||||
|
||||
/**
|
||||
* @brief Convert from rocmtools status into RDC status
|
||||
|
||||
@@ -26,7 +26,9 @@ THE SOFTWARE.
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
|
||||
// #include "hsa.h"
|
||||
#include "rdc/rdc.h"
|
||||
@@ -114,7 +116,7 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
|
||||
int RdcRocpBase::run_profiler(const char* feature_name) {
|
||||
const char* events[features_count] = {feature_name};
|
||||
|
||||
// initialize hsa. hsa_init() will also load the profiler libs under the hood
|
||||
@@ -176,14 +178,39 @@ int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RdcRocpBase::RdcRocpBase() {
|
||||
// populate monitored fields
|
||||
const std::map<rdc_field_t, const char*> counter_map_k = {
|
||||
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
|
||||
};
|
||||
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
|
||||
const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) {
|
||||
return counter_map_k.at(field);
|
||||
}
|
||||
|
||||
const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
|
||||
std::vector<rdc_field_t> field_ids;
|
||||
for (auto& [k, v] : counter_map_k) {
|
||||
metrics[v] = 0.0;
|
||||
field_ids.push_back(k);
|
||||
}
|
||||
return field_ids;
|
||||
}
|
||||
|
||||
RdcRocpBase::RdcRocpBase() {
|
||||
counter_map_k = {
|
||||
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
|
||||
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
|
||||
{RDC_FI_PROF_FLOPS_16, "FLOPS_16"},
|
||||
{RDC_FI_PROF_FLOPS_32, "FLOPS_32"},
|
||||
{RDC_FI_PROF_FLOPS_64, "FLOPS_64"},
|
||||
{RDC_FI_PROF_ACTIVE_CYCLES, "ACTIVE_CYCLES"},
|
||||
{RDC_FI_PROF_ACTIVE_WAVES, "ACTIVE_WAVES"},
|
||||
{RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"},
|
||||
};
|
||||
|
||||
// populate monitored fields
|
||||
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
|
||||
|
||||
for (auto& k : counter_map_k) {
|
||||
printf("metric %d = %s\n", k.first, k.second);
|
||||
}
|
||||
for (auto& [k, v] : counter_map_k) {
|
||||
const char* str = v;
|
||||
metrics.emplace(std::make_pair(str, 0.0));
|
||||
}
|
||||
assert(metrics.size() == counter_map_k.size());
|
||||
|
||||
@@ -253,8 +280,7 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value)
|
||||
}
|
||||
switch (gpu_field.second) {
|
||||
default:
|
||||
run_profiler("TA_BUSY_avr", queues);
|
||||
// read_features(contexts[gpu_field.first], features_count);
|
||||
run_profiler(counter_map_k.at(gpu_field.second));
|
||||
*value = metrics[counter_map_k.at(gpu_field.second)];
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
@@ -42,11 +43,8 @@ rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
|
||||
// TODO: Query fields with rocprofiler
|
||||
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
|
||||
// extract all keys from counter_map
|
||||
std::vector<uint32_t> counter_keys;
|
||||
counter_keys.reserve(amd::rdc::counter_map_k.size());
|
||||
for (auto it : amd::rdc::counter_map_k) {
|
||||
counter_keys.push_back(it.first);
|
||||
}
|
||||
std::vector<rdc_field_t> fields = rocp.get_field_ids();
|
||||
std::vector<uint32_t> counter_keys(fields.begin(), fields.end());
|
||||
|
||||
*field_count = counter_keys.size();
|
||||
// copy from vector into array
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário