diff --git a/.gitignore b/.gitignore index 7b30e0519b..cda2cc3029 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ docBin/ docs/_doxygen/ # VisualStudioCode -.vscode/ +.vscode # do NOT ignore these files !.clang-format diff --git a/common/rdc_field.data b/common/rdc_field.data index eb24f3d4df..01d7fb71fc 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -102,16 +102,19 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB) // This doesn't map to rocprofiler counters directly // See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h // See metrics.xml in rocprofiler -FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false) -FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false) -FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false) -FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false) -FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false) -FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false) -FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "", "FETCH_SIZE", false) -FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "", "WRITE_SIZE", false) +FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "CU_UTILIZATION", false) +FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "CU_OCCUPANCY", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "Number of fp16 OPS / second", "FLOPS_16", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "Number of fp32 OPS / second", "FLOPS_32", false) +FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "Number of fp64 OPS / second", "FLOPS_64", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "ACTIVE_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "ACTIVE_WAVES", false) +FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "FETCH_SIZE", false) +FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "kb written to video memory", "WRITE_SIZE", false) +FLD_DESC_ENT(RDC_FI_PROF_GRBM_COUNT, "", "GRBM_COUNT", false) +FLD_DESC_ENT(RDC_FI_PROF_SQ_WAVES, "", "SQ_WAVES", false) +FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "TA_BUSY_avr", false) // Events FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false) diff --git a/example/rocprofiler_example.cc b/example/rocprofiler_example.cc index 561cda2a41..837a9dfa3d 100644 --- a/example/rocprofiler_example.cc +++ b/example/rocprofiler_example.cc @@ -105,8 +105,6 @@ int run() { } std::cout << "Created the GPU group " << group_id << std::endl; - // Only add one GPU - count = 1; for (uint32_t i = 0; i < count; i++) { result = rdc_group_gpu_add(rdc_handle, group_id, gpu_index_list[i]); // Add GPU 0 if (result != RDC_ST_OK) { @@ -129,8 +127,19 @@ int run() { field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE); field_ids.push_back(RDC_FI_POWER_USAGE); - field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY); field_ids.push_back(RDC_FI_PROF_CU_UTILIZATION); + field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY); + field_ids.push_back(RDC_FI_PROF_FLOPS_16); + field_ids.push_back(RDC_FI_PROF_FLOPS_32); + field_ids.push_back(RDC_FI_PROF_FLOPS_64); + field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES); + field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES); + field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES); + field_ids.push_back(RDC_FI_PROF_FETCH_SIZE); + field_ids.push_back(RDC_FI_PROF_WRITE_SIZE); + field_ids.push_back(RDC_FI_PROF_GRBM_COUNT); + field_ids.push_back(RDC_FI_PROF_SQ_WAVES); + field_ids.push_back(RDC_FI_PROF_TA_BUSY_AVR); result = rdc_group_field_create(rdc_handle, field_ids.size(), field_ids.data(), field_group_name, &field_group_id); if (result != RDC_ST_OK) { @@ -146,7 +155,7 @@ int run() { // Let the RDC to watch the fields and groups. The fields will be updated // once per second, the max keep age is 1 minutes and only keep 10 samples. result = rdc_field_watch(rdc_handle, group_id, field_group_id, - static_cast(1) * 1000 * 1000, 60, 10); + static_cast(1) * 10 * 1000, 60, 10); if (result != RDC_ST_OK) { std::cout << "Error watch group fields. Return: " << rdc_status_string(result); return cleanup(); @@ -159,7 +168,7 @@ int run() { // all_fields() will be called periodically at background. If running as // RDC_OPERATION_MODE_MANUAL mode, we must call rdc_field_update_all() // periodically to take samples. - usleep(5 * 1000 * 1000); // sleep 5 seconds before fetch the stats + usleep(5 * 10 * 1000); // sleep 0.05 seconds before fetch the stats // Retreive the field and group information from RDC rdc_group_info_t group_info; @@ -209,8 +218,8 @@ int run() { } std::cout << "Stop watch group:" << group_id << ", field_group:" << field_group_id << std::endl; - // Get the history data last 10 seconds - std::cout << "Get last 10 seconds metrics for group:" << group_id + // Get the history data last 0.1 seconds + std::cout << "Get last 0.1 seconds metrics for group:" << group_id << " field_group:" << field_group_id << std::endl; std::cout << "time_stamp\t" << "GPU_index\t" diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 56cfe0265a..d31708576e 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -260,6 +260,9 @@ typedef enum { RDC_FI_PROF_ELAPSED_CYCLES, RDC_FI_PROF_FETCH_SIZE, RDC_FI_PROF_WRITE_SIZE, + RDC_FI_PROF_GRBM_COUNT, + RDC_FI_PROF_SQ_WAVES, + RDC_FI_PROF_TA_BUSY_AVR, /* * @brief Raw XGMI counter events diff --git a/include/rdc_modules/rdc_rocp/RdcRocpBase.h b/include/rdc_modules/rdc_rocp/RdcRocpBase.h index c210278df6..8c87031d73 100644 --- a/include/rdc_modules/rdc_rocp/RdcRocpBase.h +++ b/include/rdc_modules/rdc_rocp/RdcRocpBase.h @@ -24,14 +24,8 @@ THE SOFTWARE. #define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_ #include -#include #include -#include -#include #include -#include -#include -#include #include #include "rdc/rdc.h" @@ -47,8 +41,7 @@ typedef struct { /// Common interface for RocP tests and samples class RdcRocpBase { - static const int dev_count = 1; - typedef std::pair pair_gpu_field_t; + // typedef const char* rocp_metric_name_t; public: RdcRocpBase(); @@ -67,26 +60,26 @@ class RdcRocpBase { * @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed * successfully. */ - rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value); + rdc_status_t rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value); const char* get_field_id_from_name(rdc_field_t); const std::vector get_field_ids(); protected: private: - rocprofiler_t* contexts[dev_count] = {nullptr}; - static const int features_count = 1; - std::map metrics = {}; - rocprofiler_feature_t features[dev_count][features_count] = {}; - void read_features(rocprofiler_t* context, const unsigned feature_count); - int run_profiler(const char* feature_name); - hsa_queue_t* queues[dev_count] = {nullptr}; + std::map metric_to_value = {}; + // array of features for each device + std::map feature; + // rocprofiler_feature_t features[dev_count][features_count] = {}; + void read_feature(rocprofiler_t* context, const unsigned feature_count); + int run_profiler(uint32_t gpu_index, rdc_field_t field); + std::vector queues; hsa_agent_arr_t agent_arr = {}; - std::map counter_map_k = {}; + std::map field_to_metric = {}; /** * @brief Convert from rocmtools status into RDC status */ - rdc_status_t Rocp2RdcError(hsa_status_t rocm_status); + rdc_status_t Rocp2RdcError(hsa_status_t status); }; } // namespace rdc diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc index 34fbb4d447..2a76a199e3 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc @@ -23,12 +23,18 @@ THE SOFTWARE. #include "rdc_modules/rdc_rocp/RdcRocpBase.h" #include +#include #include +#include #include +#include +#include #include #include +#include #include +#include // #include "hsa.h" #include "rdc/rdc.h" @@ -40,11 +46,14 @@ namespace rdc { static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) { hsa_device_type_t type; + + assert(agent_arr != nullptr); + hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr; - hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); - if (hsa_errno != HSA_STATUS_SUCCESS) { - return hsa_errno; + hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + if (status != HSA_STATUS_SUCCESS) { + return status; } if (type == HSA_DEVICE_TYPE_GPU) { @@ -62,35 +71,39 @@ static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) { return HSA_STATUS_SUCCESS; } -void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) { - hsa_status_t hsa_errno = rocprofiler_read(context, 0); - assert(hsa_errno == HSA_STATUS_SUCCESS); - hsa_errno = rocprofiler_get_data(context, 0); - assert(hsa_errno == HSA_STATUS_SUCCESS); - hsa_errno = rocprofiler_get_metrics(context); - assert(hsa_errno == HSA_STATUS_SUCCESS); - for (auto i = 0; i < feature_count; i++) { - switch (features[0][i].data.kind) { - case ROCPROFILER_DATA_KIND_DOUBLE: - metrics[features[0][i].name] = features[0][i].data.result_double; - break; - default: - RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind); - } +void RdcRocpBase::read_feature(rocprofiler_t* context, const unsigned feature_count) { + hsa_status_t status = rocprofiler_read(context, 0); + assert(status == HSA_STATUS_SUCCESS); + status = rocprofiler_get_data(context, 0); + assert(status == HSA_STATUS_SUCCESS); + status = rocprofiler_get_metrics(context); + assert(status == HSA_STATUS_SUCCESS); + switch (feature[0].data.kind) { + case ROCPROFILER_DATA_KIND_DOUBLE: + metric_to_value[feature[0].name] = feature[0].data.result_double; + break; + case ROCPROFILER_DATA_KIND_INT32: + metric_to_value[feature[0].name] = feature[0].data.result_int32; + break; + case ROCPROFILER_DATA_KIND_INT64: + metric_to_value[feature[0].name] = feature[0].data.result_int64; + break; + default: + RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << feature[0].data.kind); } } static int get_agents(hsa_agent_arr_t* agent_arr) { int errcode = 0; - hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; + hsa_status_t status = HSA_STATUS_SUCCESS; agent_arr->capacity = 1; agent_arr->count = 0; agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t)); assert(agent_arr->agents); - hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr); - if (hsa_errno != HSA_STATUS_SUCCESS) { + status = hsa_iterate_agents(get_agent_handle_cb, agent_arr); + if (status != HSA_STATUS_SUCCESS) { errcode = -1; agent_arr->capacity = 0; @@ -103,15 +116,12 @@ static int get_agents(hsa_agent_arr_t* agent_arr) { bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) { // create a single-producer queue - // TODO: check if API args are correct, especially UINT32_MAX - hsa_status_t status; - status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, - UINT32_MAX, queue); + hsa_status_t status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, + UINT32_MAX, UINT32_MAX, queue); if (status != HSA_STATUS_SUCCESS) { RDC_LOG(RDC_ERROR, "Queue creation failed"); } - // TODO: warning: is it really required!! ?? status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH); if (status != HSA_STATUS_SUCCESS) { RDC_LOG(RDC_ERROR, "HSA Queue Priority Set Failed"); @@ -120,80 +130,111 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) { return (status == HSA_STATUS_SUCCESS); } -int RdcRocpBase::run_profiler(const char* feature_name) { - const char* events[features_count] = {feature_name}; - +int RdcRocpBase::run_profiler(uint32_t gpu_index, rdc_field_t field) { // initialize hsa. hsa_init() will also load the profiler libs under the hood - hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; + hsa_status_t status = HSA_STATUS_SUCCESS; - for (int i = 0; i < dev_count; ++i) { - for (int j = 0; j < features_count; ++j) { - features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; - features[i][j].name = events[j]; - } - } + feature[gpu_index].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; + feature[gpu_index].name = field_to_metric[field]; - rocprofiler_t* contexts[dev_count] = {0}; - for (int i = 0; i < dev_count; ++i) { - rocprofiler_properties_t properties = { - queues[i], - 64, - NULL, - NULL, - }; - int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP); - hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i], - mode, &properties); - const char* error_string = nullptr; - rocprofiler_error_string(&error_string); - if (error_string != NULL) { + // rocprofiler_t* contexts[agent_arr.count] = {0}; + std::vector contexts; + contexts.reserve(agent_arr.count); + rocprofiler_properties_t properties = { + queues[gpu_index], + 64, + NULL, + NULL, + }; + int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP); + status = rocprofiler_open(agent_arr.agents[gpu_index], &feature[gpu_index], 1, + &contexts[gpu_index], mode, &properties); + const char* error_string = nullptr; + rocprofiler_error_string(&error_string); + if (error_string != nullptr) { + if (error_string[0] != '\0') { RDC_LOG(RDC_ERROR, error_string); } - assert(hsa_errno == HSA_STATUS_SUCCESS); } + assert(status == HSA_STATUS_SUCCESS); - for (int i = 0; i < dev_count; ++i) { - hsa_errno = rocprofiler_start(contexts[i], 0); - assert(hsa_errno == HSA_STATUS_SUCCESS); - } + status = rocprofiler_start(contexts[gpu_index], 0); + assert(status == HSA_STATUS_SUCCESS); // this is the duration for which the counter increments from zero. // TODO: Return error if sampling interval is lower than this value usleep(10000); - for (int i = 0; i < dev_count; ++i) { - hsa_errno = rocprofiler_stop(contexts[i], 0); - assert(hsa_errno == HSA_STATUS_SUCCESS); - } + status = rocprofiler_stop(contexts[gpu_index], 0); + assert(status == HSA_STATUS_SUCCESS); - for (int i = 0; i < dev_count; ++i) { - read_features(contexts[i], features_count); - } + read_feature(contexts[gpu_index], 1); usleep(100); - for (int i = 0; i < dev_count; ++i) { - hsa_errno = rocprofiler_close(contexts[i]); - assert(hsa_errno == HSA_STATUS_SUCCESS); - } + status = rocprofiler_close(contexts[gpu_index]); + assert(status == HSA_STATUS_SUCCESS); return 0; } const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) { - return counter_map_k.at(field); + return field_to_metric.at(field); +} + +// TODO - map RDC gpu_index to node_id +// use rocprofiler to check which metrics are supported +void check_metrics_supported(uint32_t node_id, std::vector& metrics_all, + std::vector& metrics_good) { + typedef struct { + std::vector* metrics_all_; + std::vector* metrics_good_; + uint32_t driver_node_id; + } payload_t; + // callback for rocprofiler to check which metrics are supported + auto info_callback = [](const rocprofiler_info_data_t info, void* data) { + payload_t* payload = reinterpret_cast(data); + if (info.agent_index == payload->driver_node_id) { + auto it = + std::find(payload->metrics_all_->begin(), payload->metrics_all_->end(), info.metric.name); + if (it != payload->metrics_all_->end()) { + payload->metrics_good_->push_back(info.metric.name); + RDC_LOG(RDC_DEBUG, " gpu-agent" << info.agent_index << " : " << info.metric.name << " : " + << info.metric.description); + if (info.metric.expr != NULL) // if it's a derived metric, print it's formula + RDC_LOG(RDC_DEBUG, " " << info.metric.name << " = " << info.metric.expr); + } + } + return HSA_STATUS_SUCCESS; + }; + + payload_t payload = {&metrics_all, &metrics_good, node_id}; + hsa_status_t status = + rocprofiler_iterate_info(NULL, ROCPROFILER_INFO_KIND_METRIC, info_callback, &payload); + + for (auto& iter : *(payload.metrics_good_)) { + RDC_LOG(RDC_DEBUG, iter << " : exists"); + } } const std::vector RdcRocpBase::get_field_ids() { std::vector field_ids; - for (auto& [k, v] : counter_map_k) { + for (auto& [k, v] : field_to_metric) { field_ids.push_back(k); } return field_ids; } RdcRocpBase::RdcRocpBase() { - counter_map_k = { + hsa_status_t status = hsa_init(); + if (status != HSA_STATUS_SUCCESS) { + const char* errstr = nullptr; + hsa_status_string(status, &errstr); + throw std::runtime_error("hsa error code: " + std::to_string(status) + " " + errstr); + } + + // all fields + static const std::map temp_field_map_k = { {RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"}, {RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"}, {RDC_FI_PROF_FLOPS_16, "FLOPS_16"}, @@ -204,22 +245,16 @@ RdcRocpBase::RdcRocpBase() { {RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"}, {RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"}, {RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"}, + {RDC_FI_PROF_GRBM_COUNT, "GRBM_COUNT"}, + {RDC_FI_PROF_SQ_WAVES, "SQ_WAVES"}, + {RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"}, }; - // populate monitored fields - std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n"; + std::vector unchecked_fields; + std::vector checked_fields; - for (auto& [k, v] : counter_map_k) { - const char* str = v; - metrics.emplace(std::make_pair(str, 0.0)); - } - assert(metrics.size() == counter_map_k.size()); - - hsa_status_t err = hsa_init(); - if (err != HSA_STATUS_SUCCESS) { - const char* errstr = nullptr; - hsa_status_string(err, &errstr); - throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr); + for (auto& [k, v] : temp_field_map_k) { + unchecked_fields.push_back(v); } // populate list of agents @@ -227,42 +262,67 @@ RdcRocpBase::RdcRocpBase() { if (errcode != 0) { return; } + RDC_LOG(RDC_DEBUG, "Agent count: " << agent_arr.count); - for (int i = 0; i < dev_count; ++i) { - int j = 0; - for (auto& metric : metrics) { - features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; - features[i][j].name = metric.first; - j++; + uint32_t driver_node_id = 0; + for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) { + status = hsa_agent_get_info(agent_arr.agents[gpu_index], + static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), + &driver_node_id); + if (status != HSA_STATUS_SUCCESS) { + const char* errstr = nullptr; + hsa_status_string(status, &errstr); + RDC_LOG(RDC_ERROR, "hsa error: " << std::to_string(status) << " " << errstr); + } else { + RDC_LOG(RDC_DEBUG, "gpu_index[" << gpu_index << "] = node_id[" << driver_node_id << "]"); + } + } + // only check metrics for the last GPU + // TODO: add support for different metrics per GPU + // currently it's assumed that all GPUs are the same + check_metrics_supported(driver_node_id, unchecked_fields, checked_fields); + + for (auto& [k, v] : temp_field_map_k) { + auto found = std::find(checked_fields.begin(), checked_fields.end(), v); + if (found != checked_fields.end()) { + field_to_metric.insert({k, v}); } } - for (int i = 0; i < dev_count; ++i) { - if (!createHsaQueue(&queues[i], agent_arr.agents[i])) { - RDC_LOG(RDC_ERROR, "can't create queues[" << i << "]\n"); + RDC_LOG(RDC_DEBUG, "Rocprofiler supports " << field_to_metric.size() << " fields"); + + for (auto& [k, v] : field_to_metric) { + const char* str = v; + metric_to_value.insert({str, 0.0}); + } + assert(metric_to_value.size() == field_to_metric.size()); + + for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) { + for (auto& metric : metric_to_value) { + rocprofiler_feature_t temp_feature; + temp_feature.kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; + temp_feature.name = metric.first; + feature.insert({gpu_index, temp_feature}); + } + } + + for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) { + queues.push_back(nullptr); + if (!createHsaQueue(&queues[gpu_index], agent_arr.agents[gpu_index])) { + RDC_LOG(RDC_ERROR, "can't create queues[" << gpu_index << "]\n"); } } } RdcRocpBase::~RdcRocpBase() { - hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; - for (int i = 0; i < dev_count; ++i) { - hsa_errno = rocprofiler_stop(contexts[i], 0); - assert(hsa_errno == HSA_STATUS_SUCCESS); - } - - for (int i = 0; i < dev_count; ++i) { - hsa_errno = rocprofiler_close(contexts[i]); - assert(hsa_errno == HSA_STATUS_SUCCESS); - } - - hsa_errno = hsa_shut_down(); - assert(hsa_errno == HSA_STATUS_SUCCESS); - hsa_errno = hsa_shut_down(); - assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED); + hsa_status_t status = HSA_STATUS_SUCCESS; + status = hsa_shut_down(); + assert(status == HSA_STATUS_SUCCESS); + status = hsa_shut_down(); + assert(status == HSA_STATUS_ERROR_NOT_INITIALIZED); } -rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) { +rdc_status_t RdcRocpBase::rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value) { if (value == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -271,17 +331,17 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) if (status != HSA_STATUS_SUCCESS) { return Rocp2RdcError(status); } - switch (gpu_field.second) { + switch (field) { default: - run_profiler(counter_map_k.at(gpu_field.second)); - *value = metrics[counter_map_k.at(gpu_field.second)]; + run_profiler(gpu_index, field); + *value = metric_to_value[field_to_metric[field]]; break; } return Rocp2RdcError(status); } -rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) { - switch (rocm_status) { +rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t status) { + switch (status) { case HSA_STATUS_SUCCESS: return RDC_ST_OK; default: diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc index 8d17186d81..efbf665d70 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include @@ -40,7 +41,6 @@ amd::rdc::RdcRocpBase rocp; rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; } // get supported field ids -// TODO: Query fields with rocprofiler rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) { // extract all keys from counter_map std::vector fields = rocp.get_field_ids(); @@ -69,7 +69,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3 rdc_gpu_field_value_t values[BULK_FIELDS_MAX]; uint32_t bulk_count = 0; rdc_status_t status = RDC_ST_UNKNOWN_ERROR; - double data; + double data = NAN; for (uint32_t i = 0; i < fields_count; i++) { if (bulk_count >= BULK_FIELDS_MAX) { @@ -81,7 +81,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3 bulk_count = 0; } - status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data); + status = rocp.rocp_lookup(fields[i].gpu_index, fields[i].field_id, &data); // get value values[bulk_count].gpu_index = fields[i].gpu_index; values[bulk_count].field_value.type = DOUBLE; @@ -92,7 +92,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3 bulk_count++; } if (bulk_count != 0) { - rdc_status_t status = callback(values, bulk_count, user_data); + status = callback(values, bulk_count, user_data); if (status != RDC_ST_OK) { return status; } diff --git a/tests/rdc_tests/CMakeLists.txt b/tests/rdc_tests/CMakeLists.txt index cac77fd7bd..59d2c5d618 100755 --- a/tests/rdc_tests/CMakeLists.txt +++ b/tests/rdc_tests/CMakeLists.txt @@ -13,15 +13,13 @@ option(INSTALL_GTEST "Install GTest (only useful if GTest is not already install # Hack to find libraries after installation # /opt/rocm/share/rdc/rdctst_tests/../../../ = /opt/rocm set(RDCTST_RPATH + "\$ORIGIN" "\$ORIGIN/../../../lib" "\$ORIGIN/../../../lib/rdc") -# replace lib with lib64 -list(TRANSFORM RDCTST_RPATH REPLACE "lib" "lib64" OUTPUT_VARIABLE RDCTST_RPATH64) # combine lists set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} - ${RDCTST_RPATH} - ${RDCTST_RPATH64}) + ${RDCTST_RPATH}) # # Print out the build configuration being used: