Add GPU indexing and fix check for fields in rocprof
- Fix RUNPATH for tests Change-Id: I79517592b49d27080a010a2e41e5878adf24a157 Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Tento commit je obsažen v:
+1
-1
@@ -16,7 +16,7 @@ docBin/
|
||||
docs/_doxygen/
|
||||
|
||||
# VisualStudioCode
|
||||
.vscode/
|
||||
.vscode
|
||||
|
||||
# do NOT ignore these files
|
||||
!.clang-format
|
||||
|
||||
+13
-10
@@ -102,16 +102,19 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
|
||||
// This doesn't map to rocprofiler counters directly
|
||||
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
||||
// See metrics.xml in rocprofiler
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "", "FETCH_SIZE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "", "WRITE_SIZE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "CU_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "CU_OCCUPANCY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "Number of fp16 OPS / second", "FLOPS_16", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "Number of fp32 OPS / second", "FLOPS_32", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "Number of fp64 OPS / second", "FLOPS_64", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "ACTIVE_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "ACTIVE_WAVES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "FETCH_SIZE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "kb written to video memory", "WRITE_SIZE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_GRBM_COUNT, "", "GRBM_COUNT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_SQ_WAVES, "", "SQ_WAVES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "TA_BUSY_avr", false)
|
||||
|
||||
// Events
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
||||
|
||||
@@ -105,8 +105,6 @@ int run() {
|
||||
}
|
||||
std::cout << "Created the GPU group " << group_id << std::endl;
|
||||
|
||||
// Only add one GPU
|
||||
count = 1;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
result = rdc_group_gpu_add(rdc_handle, group_id, gpu_index_list[i]); // Add GPU 0
|
||||
if (result != RDC_ST_OK) {
|
||||
@@ -129,8 +127,19 @@ int run() {
|
||||
|
||||
field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE);
|
||||
field_ids.push_back(RDC_FI_POWER_USAGE);
|
||||
field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY);
|
||||
field_ids.push_back(RDC_FI_PROF_CU_UTILIZATION);
|
||||
field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY);
|
||||
field_ids.push_back(RDC_FI_PROF_FLOPS_16);
|
||||
field_ids.push_back(RDC_FI_PROF_FLOPS_32);
|
||||
field_ids.push_back(RDC_FI_PROF_FLOPS_64);
|
||||
field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES);
|
||||
field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES);
|
||||
field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES);
|
||||
field_ids.push_back(RDC_FI_PROF_FETCH_SIZE);
|
||||
field_ids.push_back(RDC_FI_PROF_WRITE_SIZE);
|
||||
field_ids.push_back(RDC_FI_PROF_GRBM_COUNT);
|
||||
field_ids.push_back(RDC_FI_PROF_SQ_WAVES);
|
||||
field_ids.push_back(RDC_FI_PROF_TA_BUSY_AVR);
|
||||
result = rdc_group_field_create(rdc_handle, field_ids.size(), field_ids.data(), field_group_name,
|
||||
&field_group_id);
|
||||
if (result != RDC_ST_OK) {
|
||||
@@ -146,7 +155,7 @@ int run() {
|
||||
// Let the RDC to watch the fields and groups. The fields will be updated
|
||||
// once per second, the max keep age is 1 minutes and only keep 10 samples.
|
||||
result = rdc_field_watch(rdc_handle, group_id, field_group_id,
|
||||
static_cast<uint64_t>(1) * 1000 * 1000, 60, 10);
|
||||
static_cast<uint64_t>(1) * 10 * 1000, 60, 10);
|
||||
if (result != RDC_ST_OK) {
|
||||
std::cout << "Error watch group fields. Return: " << rdc_status_string(result);
|
||||
return cleanup();
|
||||
@@ -159,7 +168,7 @@ int run() {
|
||||
// all_fields() will be called periodically at background. If running as
|
||||
// RDC_OPERATION_MODE_MANUAL mode, we must call rdc_field_update_all()
|
||||
// periodically to take samples.
|
||||
usleep(5 * 1000 * 1000); // sleep 5 seconds before fetch the stats
|
||||
usleep(5 * 10 * 1000); // sleep 0.05 seconds before fetch the stats
|
||||
|
||||
// Retreive the field and group information from RDC
|
||||
rdc_group_info_t group_info;
|
||||
@@ -209,8 +218,8 @@ int run() {
|
||||
}
|
||||
std::cout << "Stop watch group:" << group_id << ", field_group:" << field_group_id << std::endl;
|
||||
|
||||
// Get the history data last 10 seconds
|
||||
std::cout << "Get last 10 seconds metrics for group:" << group_id
|
||||
// Get the history data last 0.1 seconds
|
||||
std::cout << "Get last 0.1 seconds metrics for group:" << group_id
|
||||
<< " field_group:" << field_group_id << std::endl;
|
||||
std::cout << "time_stamp\t"
|
||||
<< "GPU_index\t"
|
||||
|
||||
@@ -260,6 +260,9 @@ typedef enum {
|
||||
RDC_FI_PROF_ELAPSED_CYCLES,
|
||||
RDC_FI_PROF_FETCH_SIZE,
|
||||
RDC_FI_PROF_WRITE_SIZE,
|
||||
RDC_FI_PROF_GRBM_COUNT,
|
||||
RDC_FI_PROF_SQ_WAVES,
|
||||
RDC_FI_PROF_TA_BUSY_AVR,
|
||||
|
||||
/*
|
||||
* @brief Raw XGMI counter events
|
||||
|
||||
@@ -24,14 +24,8 @@ THE SOFTWARE.
|
||||
#define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "rdc/rdc.h"
|
||||
@@ -47,8 +41,7 @@ typedef struct {
|
||||
|
||||
/// Common interface for RocP tests and samples
|
||||
class RdcRocpBase {
|
||||
static const int dev_count = 1;
|
||||
typedef std::pair<uint32_t, rdc_field_t> pair_gpu_field_t;
|
||||
// typedef const char* rocp_metric_name_t;
|
||||
|
||||
public:
|
||||
RdcRocpBase();
|
||||
@@ -67,26 +60,26 @@ class RdcRocpBase {
|
||||
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
|
||||
rdc_status_t rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value);
|
||||
const char* get_field_id_from_name(rdc_field_t);
|
||||
const std::vector<rdc_field_t> get_field_ids();
|
||||
|
||||
protected:
|
||||
private:
|
||||
rocprofiler_t* contexts[dev_count] = {nullptr};
|
||||
static const int features_count = 1;
|
||||
std::map<const char*, double> metrics = {};
|
||||
rocprofiler_feature_t features[dev_count][features_count] = {};
|
||||
void read_features(rocprofiler_t* context, const unsigned feature_count);
|
||||
int run_profiler(const char* feature_name);
|
||||
hsa_queue_t* queues[dev_count] = {nullptr};
|
||||
std::map<const char*, double> metric_to_value = {};
|
||||
// array of features for each device
|
||||
std::map<uint32_t, rocprofiler_feature_t> feature;
|
||||
// rocprofiler_feature_t features[dev_count][features_count] = {};
|
||||
void read_feature(rocprofiler_t* context, const unsigned feature_count);
|
||||
int run_profiler(uint32_t gpu_index, rdc_field_t field);
|
||||
std::vector<hsa_queue_t*> queues;
|
||||
hsa_agent_arr_t agent_arr = {};
|
||||
std::map<rdc_field_t, const char*> counter_map_k = {};
|
||||
std::map<rdc_field_t, const char*> field_to_metric = {};
|
||||
|
||||
/**
|
||||
* @brief Convert from rocmtools status into RDC status
|
||||
*/
|
||||
rdc_status_t Rocp2RdcError(hsa_status_t rocm_status);
|
||||
rdc_status_t Rocp2RdcError(hsa_status_t status);
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -23,12 +23,18 @@ THE SOFTWARE.
|
||||
#include "rdc_modules/rdc_rocp/RdcRocpBase.h"
|
||||
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <csignal>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// #include "hsa.h"
|
||||
#include "rdc/rdc.h"
|
||||
@@ -40,11 +46,14 @@ namespace rdc {
|
||||
|
||||
static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
|
||||
hsa_device_type_t type;
|
||||
|
||||
assert(agent_arr != nullptr);
|
||||
|
||||
hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr;
|
||||
|
||||
hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
if (hsa_errno != HSA_STATUS_SUCCESS) {
|
||||
return hsa_errno;
|
||||
hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_GPU) {
|
||||
@@ -62,35 +71,39 @@ static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) {
|
||||
hsa_status_t hsa_errno = rocprofiler_read(context, 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = rocprofiler_get_data(context, 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = rocprofiler_get_metrics(context);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
for (auto i = 0; i < feature_count; i++) {
|
||||
switch (features[0][i].data.kind) {
|
||||
case ROCPROFILER_DATA_KIND_DOUBLE:
|
||||
metrics[features[0][i].name] = features[0][i].data.result_double;
|
||||
break;
|
||||
default:
|
||||
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind);
|
||||
}
|
||||
void RdcRocpBase::read_feature(rocprofiler_t* context, const unsigned feature_count) {
|
||||
hsa_status_t status = rocprofiler_read(context, 0);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = rocprofiler_get_data(context, 0);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = rocprofiler_get_metrics(context);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
switch (feature[0].data.kind) {
|
||||
case ROCPROFILER_DATA_KIND_DOUBLE:
|
||||
metric_to_value[feature[0].name] = feature[0].data.result_double;
|
||||
break;
|
||||
case ROCPROFILER_DATA_KIND_INT32:
|
||||
metric_to_value[feature[0].name] = feature[0].data.result_int32;
|
||||
break;
|
||||
case ROCPROFILER_DATA_KIND_INT64:
|
||||
metric_to_value[feature[0].name] = feature[0].data.result_int64;
|
||||
break;
|
||||
default:
|
||||
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << feature[0].data.kind);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_agents(hsa_agent_arr_t* agent_arr) {
|
||||
int errcode = 0;
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
agent_arr->capacity = 1;
|
||||
agent_arr->count = 0;
|
||||
agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t));
|
||||
assert(agent_arr->agents);
|
||||
|
||||
hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
|
||||
if (hsa_errno != HSA_STATUS_SUCCESS) {
|
||||
status = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
errcode = -1;
|
||||
|
||||
agent_arr->capacity = 0;
|
||||
@@ -103,15 +116,12 @@ static int get_agents(hsa_agent_arr_t* agent_arr) {
|
||||
|
||||
bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
|
||||
// create a single-producer queue
|
||||
// TODO: check if API args are correct, especially UINT32_MAX
|
||||
hsa_status_t status;
|
||||
status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, queue);
|
||||
hsa_status_t status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR, "Queue creation failed");
|
||||
}
|
||||
|
||||
// TODO: warning: is it really required!! ??
|
||||
status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR, "HSA Queue Priority Set Failed");
|
||||
@@ -120,80 +130,111 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int RdcRocpBase::run_profiler(const char* feature_name) {
|
||||
const char* events[features_count] = {feature_name};
|
||||
|
||||
int RdcRocpBase::run_profiler(uint32_t gpu_index, rdc_field_t field) {
|
||||
// initialize hsa. hsa_init() will also load the profiler libs under the hood
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
for (int j = 0; j < features_count; ++j) {
|
||||
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[i][j].name = events[j];
|
||||
}
|
||||
}
|
||||
feature[gpu_index].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[gpu_index].name = field_to_metric[field];
|
||||
|
||||
rocprofiler_t* contexts[dev_count] = {0};
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
rocprofiler_properties_t properties = {
|
||||
queues[i],
|
||||
64,
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
|
||||
hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i],
|
||||
mode, &properties);
|
||||
const char* error_string = nullptr;
|
||||
rocprofiler_error_string(&error_string);
|
||||
if (error_string != NULL) {
|
||||
// rocprofiler_t* contexts[agent_arr.count] = {0};
|
||||
std::vector<rocprofiler_t*> contexts;
|
||||
contexts.reserve(agent_arr.count);
|
||||
rocprofiler_properties_t properties = {
|
||||
queues[gpu_index],
|
||||
64,
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
|
||||
status = rocprofiler_open(agent_arr.agents[gpu_index], &feature[gpu_index], 1,
|
||||
&contexts[gpu_index], mode, &properties);
|
||||
const char* error_string = nullptr;
|
||||
rocprofiler_error_string(&error_string);
|
||||
if (error_string != nullptr) {
|
||||
if (error_string[0] != '\0') {
|
||||
RDC_LOG(RDC_ERROR, error_string);
|
||||
}
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_start(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
status = rocprofiler_start(contexts[gpu_index], 0);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// this is the duration for which the counter increments from zero.
|
||||
// TODO: Return error if sampling interval is lower than this value
|
||||
usleep(10000);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_stop(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
status = rocprofiler_stop(contexts[gpu_index], 0);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
read_features(contexts[i], features_count);
|
||||
}
|
||||
read_feature(contexts[gpu_index], 1);
|
||||
|
||||
usleep(100);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_close(contexts[i]);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
status = rocprofiler_close(contexts[gpu_index]);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) {
|
||||
return counter_map_k.at(field);
|
||||
return field_to_metric.at(field);
|
||||
}
|
||||
|
||||
// TODO - map RDC gpu_index to node_id
|
||||
// use rocprofiler to check which metrics are supported
|
||||
void check_metrics_supported(uint32_t node_id, std::vector<std::string>& metrics_all,
|
||||
std::vector<std::string>& metrics_good) {
|
||||
typedef struct {
|
||||
std::vector<std::string>* metrics_all_;
|
||||
std::vector<std::string>* metrics_good_;
|
||||
uint32_t driver_node_id;
|
||||
} payload_t;
|
||||
// callback for rocprofiler to check which metrics are supported
|
||||
auto info_callback = [](const rocprofiler_info_data_t info, void* data) {
|
||||
payload_t* payload = reinterpret_cast<payload_t*>(data);
|
||||
if (info.agent_index == payload->driver_node_id) {
|
||||
auto it =
|
||||
std::find(payload->metrics_all_->begin(), payload->metrics_all_->end(), info.metric.name);
|
||||
if (it != payload->metrics_all_->end()) {
|
||||
payload->metrics_good_->push_back(info.metric.name);
|
||||
RDC_LOG(RDC_DEBUG, " gpu-agent" << info.agent_index << " : " << info.metric.name << " : "
|
||||
<< info.metric.description);
|
||||
if (info.metric.expr != NULL) // if it's a derived metric, print it's formula
|
||||
RDC_LOG(RDC_DEBUG, " " << info.metric.name << " = " << info.metric.expr);
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
payload_t payload = {&metrics_all, &metrics_good, node_id};
|
||||
hsa_status_t status =
|
||||
rocprofiler_iterate_info(NULL, ROCPROFILER_INFO_KIND_METRIC, info_callback, &payload);
|
||||
|
||||
for (auto& iter : *(payload.metrics_good_)) {
|
||||
RDC_LOG(RDC_DEBUG, iter << " : exists");
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
|
||||
std::vector<rdc_field_t> field_ids;
|
||||
for (auto& [k, v] : counter_map_k) {
|
||||
for (auto& [k, v] : field_to_metric) {
|
||||
field_ids.push_back(k);
|
||||
}
|
||||
return field_ids;
|
||||
}
|
||||
|
||||
RdcRocpBase::RdcRocpBase() {
|
||||
counter_map_k = {
|
||||
hsa_status_t status = hsa_init();
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = nullptr;
|
||||
hsa_status_string(status, &errstr);
|
||||
throw std::runtime_error("hsa error code: " + std::to_string(status) + " " + errstr);
|
||||
}
|
||||
|
||||
// all fields
|
||||
static const std::map<rdc_field_t, const char*> temp_field_map_k = {
|
||||
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
|
||||
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
|
||||
{RDC_FI_PROF_FLOPS_16, "FLOPS_16"},
|
||||
@@ -204,22 +245,16 @@ RdcRocpBase::RdcRocpBase() {
|
||||
{RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"},
|
||||
{RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"},
|
||||
{RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"},
|
||||
{RDC_FI_PROF_GRBM_COUNT, "GRBM_COUNT"},
|
||||
{RDC_FI_PROF_SQ_WAVES, "SQ_WAVES"},
|
||||
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
|
||||
};
|
||||
|
||||
// populate monitored fields
|
||||
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
|
||||
std::vector<std::string> unchecked_fields;
|
||||
std::vector<std::string> checked_fields;
|
||||
|
||||
for (auto& [k, v] : counter_map_k) {
|
||||
const char* str = v;
|
||||
metrics.emplace(std::make_pair(str, 0.0));
|
||||
}
|
||||
assert(metrics.size() == counter_map_k.size());
|
||||
|
||||
hsa_status_t err = hsa_init();
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = nullptr;
|
||||
hsa_status_string(err, &errstr);
|
||||
throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr);
|
||||
for (auto& [k, v] : temp_field_map_k) {
|
||||
unchecked_fields.push_back(v);
|
||||
}
|
||||
|
||||
// populate list of agents
|
||||
@@ -227,42 +262,67 @@ RdcRocpBase::RdcRocpBase() {
|
||||
if (errcode != 0) {
|
||||
return;
|
||||
}
|
||||
RDC_LOG(RDC_DEBUG, "Agent count: " << agent_arr.count);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
int j = 0;
|
||||
for (auto& metric : metrics) {
|
||||
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[i][j].name = metric.first;
|
||||
j++;
|
||||
uint32_t driver_node_id = 0;
|
||||
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
|
||||
status = hsa_agent_get_info(agent_arr.agents[gpu_index],
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&driver_node_id);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = nullptr;
|
||||
hsa_status_string(status, &errstr);
|
||||
RDC_LOG(RDC_ERROR, "hsa error: " << std::to_string(status) << " " << errstr);
|
||||
} else {
|
||||
RDC_LOG(RDC_DEBUG, "gpu_index[" << gpu_index << "] = node_id[" << driver_node_id << "]");
|
||||
}
|
||||
}
|
||||
// only check metrics for the last GPU
|
||||
// TODO: add support for different metrics per GPU
|
||||
// currently it's assumed that all GPUs are the same
|
||||
check_metrics_supported(driver_node_id, unchecked_fields, checked_fields);
|
||||
|
||||
for (auto& [k, v] : temp_field_map_k) {
|
||||
auto found = std::find(checked_fields.begin(), checked_fields.end(), v);
|
||||
if (found != checked_fields.end()) {
|
||||
field_to_metric.insert({k, v});
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
if (!createHsaQueue(&queues[i], agent_arr.agents[i])) {
|
||||
RDC_LOG(RDC_ERROR, "can't create queues[" << i << "]\n");
|
||||
RDC_LOG(RDC_DEBUG, "Rocprofiler supports " << field_to_metric.size() << " fields");
|
||||
|
||||
for (auto& [k, v] : field_to_metric) {
|
||||
const char* str = v;
|
||||
metric_to_value.insert({str, 0.0});
|
||||
}
|
||||
assert(metric_to_value.size() == field_to_metric.size());
|
||||
|
||||
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
|
||||
for (auto& metric : metric_to_value) {
|
||||
rocprofiler_feature_t temp_feature;
|
||||
temp_feature.kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
temp_feature.name = metric.first;
|
||||
feature.insert({gpu_index, temp_feature});
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
|
||||
queues.push_back(nullptr);
|
||||
if (!createHsaQueue(&queues[gpu_index], agent_arr.agents[gpu_index])) {
|
||||
RDC_LOG(RDC_ERROR, "can't create queues[" << gpu_index << "]\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RdcRocpBase::~RdcRocpBase() {
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_stop(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_close(contexts[i]);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
hsa_errno = hsa_shut_down();
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = hsa_shut_down();
|
||||
assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED);
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
status = hsa_shut_down();
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_shut_down();
|
||||
assert(status == HSA_STATUS_ERROR_NOT_INITIALIZED);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) {
|
||||
rdc_status_t RdcRocpBase::rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value) {
|
||||
if (value == nullptr) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
@@ -271,17 +331,17 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value)
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
switch (gpu_field.second) {
|
||||
switch (field) {
|
||||
default:
|
||||
run_profiler(counter_map_k.at(gpu_field.second));
|
||||
*value = metrics[counter_map_k.at(gpu_field.second)];
|
||||
run_profiler(gpu_index, field);
|
||||
*value = metric_to_value[field_to_metric[field]];
|
||||
break;
|
||||
}
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) {
|
||||
switch (rocm_status) {
|
||||
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t status) {
|
||||
switch (status) {
|
||||
case HSA_STATUS_SUCCESS:
|
||||
return RDC_ST_OK;
|
||||
default:
|
||||
|
||||
@@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <algorithm>
|
||||
@@ -40,7 +41,6 @@ amd::rdc::RdcRocpBase rocp;
|
||||
rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
|
||||
|
||||
// get supported field ids
|
||||
// TODO: Query fields with rocprofiler
|
||||
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
|
||||
// extract all keys from counter_map
|
||||
std::vector<rdc_field_t> fields = rocp.get_field_ids();
|
||||
@@ -69,7 +69,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
|
||||
rdc_gpu_field_value_t values[BULK_FIELDS_MAX];
|
||||
uint32_t bulk_count = 0;
|
||||
rdc_status_t status = RDC_ST_UNKNOWN_ERROR;
|
||||
double data;
|
||||
double data = NAN;
|
||||
|
||||
for (uint32_t i = 0; i < fields_count; i++) {
|
||||
if (bulk_count >= BULK_FIELDS_MAX) {
|
||||
@@ -81,7 +81,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
|
||||
bulk_count = 0;
|
||||
}
|
||||
|
||||
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data);
|
||||
status = rocp.rocp_lookup(fields[i].gpu_index, fields[i].field_id, &data);
|
||||
// get value
|
||||
values[bulk_count].gpu_index = fields[i].gpu_index;
|
||||
values[bulk_count].field_value.type = DOUBLE;
|
||||
@@ -92,7 +92,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
|
||||
bulk_count++;
|
||||
}
|
||||
if (bulk_count != 0) {
|
||||
rdc_status_t status = callback(values, bulk_count, user_data);
|
||||
status = callback(values, bulk_count, user_data);
|
||||
if (status != RDC_ST_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -13,15 +13,13 @@ option(INSTALL_GTEST "Install GTest (only useful if GTest is not already install
|
||||
# Hack to find libraries after installation
|
||||
# /opt/rocm/share/rdc/rdctst_tests/../../../ = /opt/rocm
|
||||
set(RDCTST_RPATH
|
||||
"\$ORIGIN"
|
||||
"\$ORIGIN/../../../lib"
|
||||
"\$ORIGIN/../../../lib/rdc")
|
||||
# replace lib with lib64
|
||||
list(TRANSFORM RDCTST_RPATH REPLACE "lib" "lib64" OUTPUT_VARIABLE RDCTST_RPATH64)
|
||||
# combine lists
|
||||
set(CMAKE_INSTALL_RPATH
|
||||
${CMAKE_INSTALL_RPATH}
|
||||
${RDCTST_RPATH}
|
||||
${RDCTST_RPATH64})
|
||||
${RDCTST_RPATH})
|
||||
|
||||
#
|
||||
# Print out the build configuration being used:
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele