Add GPU indexing and fix check for fields in rocprof

- Fix RUNPATH for tests

Change-Id: I79517592b49d27080a010a2e41e5878adf24a157
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Tento commit je obsažen v:
Galantsev, Dmitrii
2024-05-22 01:53:34 -05:00
rodič 7c6bd4dc1c
revize e11afbf60f
8 změnil soubory, kde provedl 222 přidání a 156 odebrání
+1 -1
Zobrazit soubor
@@ -16,7 +16,7 @@ docBin/
docs/_doxygen/
# VisualStudioCode
.vscode/
.vscode
# do NOT ignore these files
!.clang-format
+13 -10
Zobrazit soubor
@@ -102,16 +102,19 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
// This doesn't map to rocprofiler counters directly
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
// See metrics.xml in rocprofiler
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false)
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "", "FETCH_SIZE", false)
FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "", "WRITE_SIZE", false)
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "CU_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "CU_OCCUPANCY", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "Number of fp16 OPS / second", "FLOPS_16", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "Number of fp32 OPS / second", "FLOPS_32", false)
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "Number of fp64 OPS / second", "FLOPS_64", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "ACTIVE_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "ACTIVE_WAVES", false)
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "FETCH_SIZE", false)
FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "kb written to video memory", "WRITE_SIZE", false)
FLD_DESC_ENT(RDC_FI_PROF_GRBM_COUNT, "", "GRBM_COUNT", false)
FLD_DESC_ENT(RDC_FI_PROF_SQ_WAVES, "", "SQ_WAVES", false)
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "TA_BUSY_avr", false)
// Events
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
+16 -7
Zobrazit soubor
@@ -105,8 +105,6 @@ int run() {
}
std::cout << "Created the GPU group " << group_id << std::endl;
// Only add one GPU
count = 1;
for (uint32_t i = 0; i < count; i++) {
result = rdc_group_gpu_add(rdc_handle, group_id, gpu_index_list[i]); // Add GPU 0
if (result != RDC_ST_OK) {
@@ -129,8 +127,19 @@ int run() {
field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE);
field_ids.push_back(RDC_FI_POWER_USAGE);
field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY);
field_ids.push_back(RDC_FI_PROF_CU_UTILIZATION);
field_ids.push_back(RDC_FI_PROF_CU_OCCUPANCY);
field_ids.push_back(RDC_FI_PROF_FLOPS_16);
field_ids.push_back(RDC_FI_PROF_FLOPS_32);
field_ids.push_back(RDC_FI_PROF_FLOPS_64);
field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES);
field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES);
field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES);
field_ids.push_back(RDC_FI_PROF_FETCH_SIZE);
field_ids.push_back(RDC_FI_PROF_WRITE_SIZE);
field_ids.push_back(RDC_FI_PROF_GRBM_COUNT);
field_ids.push_back(RDC_FI_PROF_SQ_WAVES);
field_ids.push_back(RDC_FI_PROF_TA_BUSY_AVR);
result = rdc_group_field_create(rdc_handle, field_ids.size(), field_ids.data(), field_group_name,
&field_group_id);
if (result != RDC_ST_OK) {
@@ -146,7 +155,7 @@ int run() {
// Let the RDC to watch the fields and groups. The fields will be updated
// once per second, the max keep age is 1 minutes and only keep 10 samples.
result = rdc_field_watch(rdc_handle, group_id, field_group_id,
static_cast<uint64_t>(1) * 1000 * 1000, 60, 10);
static_cast<uint64_t>(1) * 10 * 1000, 60, 10);
if (result != RDC_ST_OK) {
std::cout << "Error watch group fields. Return: " << rdc_status_string(result);
return cleanup();
@@ -159,7 +168,7 @@ int run() {
// all_fields() will be called periodically at background. If running as
// RDC_OPERATION_MODE_MANUAL mode, we must call rdc_field_update_all()
// periodically to take samples.
usleep(5 * 1000 * 1000); // sleep 5 seconds before fetch the stats
usleep(5 * 10 * 1000); // sleep 0.05 seconds before fetch the stats
// Retreive the field and group information from RDC
rdc_group_info_t group_info;
@@ -209,8 +218,8 @@ int run() {
}
std::cout << "Stop watch group:" << group_id << ", field_group:" << field_group_id << std::endl;
// Get the history data last 10 seconds
std::cout << "Get last 10 seconds metrics for group:" << group_id
// Get the history data last 0.1 seconds
std::cout << "Get last 0.1 seconds metrics for group:" << group_id
<< " field_group:" << field_group_id << std::endl;
std::cout << "time_stamp\t"
<< "GPU_index\t"
+3
Zobrazit soubor
@@ -260,6 +260,9 @@ typedef enum {
RDC_FI_PROF_ELAPSED_CYCLES,
RDC_FI_PROF_FETCH_SIZE,
RDC_FI_PROF_WRITE_SIZE,
RDC_FI_PROF_GRBM_COUNT,
RDC_FI_PROF_SQ_WAVES,
RDC_FI_PROF_TA_BUSY_AVR,
/*
* @brief Raw XGMI counter events
+11 -18
Zobrazit soubor
@@ -24,14 +24,8 @@ THE SOFTWARE.
#define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
#include <rocprofiler/rocprofiler.h>
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <list>
#include <map>
#include <string>
#include <typeinfo>
#include <unordered_map>
#include <vector>
#include "rdc/rdc.h"
@@ -47,8 +41,7 @@ typedef struct {
/// Common interface for RocP tests and samples
class RdcRocpBase {
static const int dev_count = 1;
typedef std::pair<uint32_t, rdc_field_t> pair_gpu_field_t;
// typedef const char* rocp_metric_name_t;
public:
RdcRocpBase();
@@ -67,26 +60,26 @@ class RdcRocpBase {
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
* successfully.
*/
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
rdc_status_t rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value);
const char* get_field_id_from_name(rdc_field_t);
const std::vector<rdc_field_t> get_field_ids();
protected:
private:
rocprofiler_t* contexts[dev_count] = {nullptr};
static const int features_count = 1;
std::map<const char*, double> metrics = {};
rocprofiler_feature_t features[dev_count][features_count] = {};
void read_features(rocprofiler_t* context, const unsigned feature_count);
int run_profiler(const char* feature_name);
hsa_queue_t* queues[dev_count] = {nullptr};
std::map<const char*, double> metric_to_value = {};
// array of features for each device
std::map<uint32_t, rocprofiler_feature_t> feature;
// rocprofiler_feature_t features[dev_count][features_count] = {};
void read_feature(rocprofiler_t* context, const unsigned feature_count);
int run_profiler(uint32_t gpu_index, rdc_field_t field);
std::vector<hsa_queue_t*> queues;
hsa_agent_arr_t agent_arr = {};
std::map<rdc_field_t, const char*> counter_map_k = {};
std::map<rdc_field_t, const char*> field_to_metric = {};
/**
* @brief Convert from rocmtools status into RDC status
*/
rdc_status_t Rocp2RdcError(hsa_status_t rocm_status);
rdc_status_t Rocp2RdcError(hsa_status_t status);
};
} // namespace rdc
+172 -112
Zobrazit soubor
@@ -23,12 +23,18 @@ THE SOFTWARE.
#include "rdc_modules/rdc_rocp/RdcRocpBase.h"
#include <rocprofiler/rocprofiler.h>
#include <sys/wait.h>
#include <unistd.h>
#include <algorithm>
#include <cassert>
#include <csignal>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <exception>
#include <utility>
#include <vector>
// #include "hsa.h"
#include "rdc/rdc.h"
@@ -40,11 +46,14 @@ namespace rdc {
static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
hsa_device_type_t type;
assert(agent_arr != nullptr);
hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr;
hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
if (hsa_errno != HSA_STATUS_SUCCESS) {
return hsa_errno;
hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
if (type == HSA_DEVICE_TYPE_GPU) {
@@ -62,35 +71,39 @@ static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
return HSA_STATUS_SUCCESS;
}
void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) {
hsa_status_t hsa_errno = rocprofiler_read(context, 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = rocprofiler_get_data(context, 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = rocprofiler_get_metrics(context);
assert(hsa_errno == HSA_STATUS_SUCCESS);
for (auto i = 0; i < feature_count; i++) {
switch (features[0][i].data.kind) {
case ROCPROFILER_DATA_KIND_DOUBLE:
metrics[features[0][i].name] = features[0][i].data.result_double;
break;
default:
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind);
}
void RdcRocpBase::read_feature(rocprofiler_t* context, const unsigned feature_count) {
hsa_status_t status = rocprofiler_read(context, 0);
assert(status == HSA_STATUS_SUCCESS);
status = rocprofiler_get_data(context, 0);
assert(status == HSA_STATUS_SUCCESS);
status = rocprofiler_get_metrics(context);
assert(status == HSA_STATUS_SUCCESS);
switch (feature[0].data.kind) {
case ROCPROFILER_DATA_KIND_DOUBLE:
metric_to_value[feature[0].name] = feature[0].data.result_double;
break;
case ROCPROFILER_DATA_KIND_INT32:
metric_to_value[feature[0].name] = feature[0].data.result_int32;
break;
case ROCPROFILER_DATA_KIND_INT64:
metric_to_value[feature[0].name] = feature[0].data.result_int64;
break;
default:
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << feature[0].data.kind);
}
}
static int get_agents(hsa_agent_arr_t* agent_arr) {
int errcode = 0;
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
hsa_status_t status = HSA_STATUS_SUCCESS;
agent_arr->capacity = 1;
agent_arr->count = 0;
agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t));
assert(agent_arr->agents);
hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
if (hsa_errno != HSA_STATUS_SUCCESS) {
status = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
if (status != HSA_STATUS_SUCCESS) {
errcode = -1;
agent_arr->capacity = 0;
@@ -103,15 +116,12 @@ static int get_agents(hsa_agent_arr_t* agent_arr) {
bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
// create a single-producer queue
// TODO: check if API args are correct, especially UINT32_MAX
hsa_status_t status;
status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, queue);
hsa_status_t status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
if (status != HSA_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR, "Queue creation failed");
}
// TODO: warning: is it really required!! ??
status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
if (status != HSA_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR, "HSA Queue Priority Set Failed");
@@ -120,80 +130,111 @@ bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
return (status == HSA_STATUS_SUCCESS);
}
int RdcRocpBase::run_profiler(const char* feature_name) {
const char* events[features_count] = {feature_name};
int RdcRocpBase::run_profiler(uint32_t gpu_index, rdc_field_t field) {
// initialize hsa. hsa_init() will also load the profiler libs under the hood
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
hsa_status_t status = HSA_STATUS_SUCCESS;
for (int i = 0; i < dev_count; ++i) {
for (int j = 0; j < features_count; ++j) {
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
features[i][j].name = events[j];
}
}
feature[gpu_index].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
feature[gpu_index].name = field_to_metric[field];
rocprofiler_t* contexts[dev_count] = {0};
for (int i = 0; i < dev_count; ++i) {
rocprofiler_properties_t properties = {
queues[i],
64,
NULL,
NULL,
};
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i],
mode, &properties);
const char* error_string = nullptr;
rocprofiler_error_string(&error_string);
if (error_string != NULL) {
// rocprofiler_t* contexts[agent_arr.count] = {0};
std::vector<rocprofiler_t*> contexts;
contexts.reserve(agent_arr.count);
rocprofiler_properties_t properties = {
queues[gpu_index],
64,
NULL,
NULL,
};
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
status = rocprofiler_open(agent_arr.agents[gpu_index], &feature[gpu_index], 1,
&contexts[gpu_index], mode, &properties);
const char* error_string = nullptr;
rocprofiler_error_string(&error_string);
if (error_string != nullptr) {
if (error_string[0] != '\0') {
RDC_LOG(RDC_ERROR, error_string);
}
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
assert(status == HSA_STATUS_SUCCESS);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_start(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
status = rocprofiler_start(contexts[gpu_index], 0);
assert(status == HSA_STATUS_SUCCESS);
// this is the duration for which the counter increments from zero.
// TODO: Return error if sampling interval is lower than this value
usleep(10000);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_stop(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
status = rocprofiler_stop(contexts[gpu_index], 0);
assert(status == HSA_STATUS_SUCCESS);
for (int i = 0; i < dev_count; ++i) {
read_features(contexts[i], features_count);
}
read_feature(contexts[gpu_index], 1);
usleep(100);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_close(contexts[i]);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
status = rocprofiler_close(contexts[gpu_index]);
assert(status == HSA_STATUS_SUCCESS);
return 0;
}
const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) {
return counter_map_k.at(field);
return field_to_metric.at(field);
}
// TODO - map RDC gpu_index to node_id
// use rocprofiler to check which metrics are supported
void check_metrics_supported(uint32_t node_id, std::vector<std::string>& metrics_all,
std::vector<std::string>& metrics_good) {
typedef struct {
std::vector<std::string>* metrics_all_;
std::vector<std::string>* metrics_good_;
uint32_t driver_node_id;
} payload_t;
// callback for rocprofiler to check which metrics are supported
auto info_callback = [](const rocprofiler_info_data_t info, void* data) {
payload_t* payload = reinterpret_cast<payload_t*>(data);
if (info.agent_index == payload->driver_node_id) {
auto it =
std::find(payload->metrics_all_->begin(), payload->metrics_all_->end(), info.metric.name);
if (it != payload->metrics_all_->end()) {
payload->metrics_good_->push_back(info.metric.name);
RDC_LOG(RDC_DEBUG, " gpu-agent" << info.agent_index << " : " << info.metric.name << " : "
<< info.metric.description);
if (info.metric.expr != NULL) // if it's a derived metric, print it's formula
RDC_LOG(RDC_DEBUG, " " << info.metric.name << " = " << info.metric.expr);
}
}
return HSA_STATUS_SUCCESS;
};
payload_t payload = {&metrics_all, &metrics_good, node_id};
hsa_status_t status =
rocprofiler_iterate_info(NULL, ROCPROFILER_INFO_KIND_METRIC, info_callback, &payload);
for (auto& iter : *(payload.metrics_good_)) {
RDC_LOG(RDC_DEBUG, iter << " : exists");
}
}
const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
std::vector<rdc_field_t> field_ids;
for (auto& [k, v] : counter_map_k) {
for (auto& [k, v] : field_to_metric) {
field_ids.push_back(k);
}
return field_ids;
}
RdcRocpBase::RdcRocpBase() {
counter_map_k = {
hsa_status_t status = hsa_init();
if (status != HSA_STATUS_SUCCESS) {
const char* errstr = nullptr;
hsa_status_string(status, &errstr);
throw std::runtime_error("hsa error code: " + std::to_string(status) + " " + errstr);
}
// all fields
static const std::map<rdc_field_t, const char*> temp_field_map_k = {
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
{RDC_FI_PROF_FLOPS_16, "FLOPS_16"},
@@ -204,22 +245,16 @@ RdcRocpBase::RdcRocpBase() {
{RDC_FI_PROF_ELAPSED_CYCLES, "ELAPSED_CYCLES"},
{RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"},
{RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"},
{RDC_FI_PROF_GRBM_COUNT, "GRBM_COUNT"},
{RDC_FI_PROF_SQ_WAVES, "SQ_WAVES"},
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
};
// populate monitored fields
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
std::vector<std::string> unchecked_fields;
std::vector<std::string> checked_fields;
for (auto& [k, v] : counter_map_k) {
const char* str = v;
metrics.emplace(std::make_pair(str, 0.0));
}
assert(metrics.size() == counter_map_k.size());
hsa_status_t err = hsa_init();
if (err != HSA_STATUS_SUCCESS) {
const char* errstr = nullptr;
hsa_status_string(err, &errstr);
throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr);
for (auto& [k, v] : temp_field_map_k) {
unchecked_fields.push_back(v);
}
// populate list of agents
@@ -227,42 +262,67 @@ RdcRocpBase::RdcRocpBase() {
if (errcode != 0) {
return;
}
RDC_LOG(RDC_DEBUG, "Agent count: " << agent_arr.count);
for (int i = 0; i < dev_count; ++i) {
int j = 0;
for (auto& metric : metrics) {
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
features[i][j].name = metric.first;
j++;
uint32_t driver_node_id = 0;
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
status = hsa_agent_get_info(agent_arr.agents[gpu_index],
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&driver_node_id);
if (status != HSA_STATUS_SUCCESS) {
const char* errstr = nullptr;
hsa_status_string(status, &errstr);
RDC_LOG(RDC_ERROR, "hsa error: " << std::to_string(status) << " " << errstr);
} else {
RDC_LOG(RDC_DEBUG, "gpu_index[" << gpu_index << "] = node_id[" << driver_node_id << "]");
}
}
// only check metrics for the last GPU
// TODO: add support for different metrics per GPU
// currently it's assumed that all GPUs are the same
check_metrics_supported(driver_node_id, unchecked_fields, checked_fields);
for (auto& [k, v] : temp_field_map_k) {
auto found = std::find(checked_fields.begin(), checked_fields.end(), v);
if (found != checked_fields.end()) {
field_to_metric.insert({k, v});
}
}
for (int i = 0; i < dev_count; ++i) {
if (!createHsaQueue(&queues[i], agent_arr.agents[i])) {
RDC_LOG(RDC_ERROR, "can't create queues[" << i << "]\n");
RDC_LOG(RDC_DEBUG, "Rocprofiler supports " << field_to_metric.size() << " fields");
for (auto& [k, v] : field_to_metric) {
const char* str = v;
metric_to_value.insert({str, 0.0});
}
assert(metric_to_value.size() == field_to_metric.size());
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
for (auto& metric : metric_to_value) {
rocprofiler_feature_t temp_feature;
temp_feature.kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
temp_feature.name = metric.first;
feature.insert({gpu_index, temp_feature});
}
}
for (uint32_t gpu_index = 0; gpu_index < agent_arr.count; gpu_index++) {
queues.push_back(nullptr);
if (!createHsaQueue(&queues[gpu_index], agent_arr.agents[gpu_index])) {
RDC_LOG(RDC_ERROR, "can't create queues[" << gpu_index << "]\n");
}
}
}
RdcRocpBase::~RdcRocpBase() {
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_stop(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_close(contexts[i]);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
hsa_errno = hsa_shut_down();
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = hsa_shut_down();
assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED);
hsa_status_t status = HSA_STATUS_SUCCESS;
status = hsa_shut_down();
assert(status == HSA_STATUS_SUCCESS);
status = hsa_shut_down();
assert(status == HSA_STATUS_ERROR_NOT_INITIALIZED);
}
rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) {
rdc_status_t RdcRocpBase::rocp_lookup(uint32_t gpu_index, rdc_field_t field, double* value) {
if (value == nullptr) {
return RDC_ST_BAD_PARAMETER;
}
@@ -271,17 +331,17 @@ rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value)
if (status != HSA_STATUS_SUCCESS) {
return Rocp2RdcError(status);
}
switch (gpu_field.second) {
switch (field) {
default:
run_profiler(counter_map_k.at(gpu_field.second));
*value = metrics[counter_map_k.at(gpu_field.second)];
run_profiler(gpu_index, field);
*value = metric_to_value[field_to_metric[field]];
break;
}
return Rocp2RdcError(status);
}
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) {
switch (rocm_status) {
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t status) {
switch (status) {
case HSA_STATUS_SUCCESS:
return RDC_ST_OK;
default:
+4 -4
Zobrazit soubor
@@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <math.h>
#include <sys/time.h>
#include <algorithm>
@@ -40,7 +41,6 @@ amd::rdc::RdcRocpBase rocp;
rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
// get supported field ids
// TODO: Query fields with rocprofiler
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
// extract all keys from counter_map
std::vector<rdc_field_t> fields = rocp.get_field_ids();
@@ -69,7 +69,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
rdc_gpu_field_value_t values[BULK_FIELDS_MAX];
uint32_t bulk_count = 0;
rdc_status_t status = RDC_ST_UNKNOWN_ERROR;
double data;
double data = NAN;
for (uint32_t i = 0; i < fields_count; i++) {
if (bulk_count >= BULK_FIELDS_MAX) {
@@ -81,7 +81,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
bulk_count = 0;
}
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data);
status = rocp.rocp_lookup(fields[i].gpu_index, fields[i].field_id, &data);
// get value
values[bulk_count].gpu_index = fields[i].gpu_index;
values[bulk_count].field_value.type = DOUBLE;
@@ -92,7 +92,7 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint3
bulk_count++;
}
if (bulk_count != 0) {
rdc_status_t status = callback(values, bulk_count, user_data);
status = callback(values, bulk_count, user_data);
if (status != RDC_ST_OK) {
return status;
}
+2 -4
Zobrazit soubor
@@ -13,15 +13,13 @@ option(INSTALL_GTEST "Install GTest (only useful if GTest is not already install
# Hack to find libraries after installation
# /opt/rocm/share/rdc/rdctst_tests/../../../ = /opt/rocm
set(RDCTST_RPATH
"\$ORIGIN"
"\$ORIGIN/../../../lib"
"\$ORIGIN/../../../lib/rdc")
# replace lib with lib64
list(TRANSFORM RDCTST_RPATH REPLACE "lib" "lib64" OUTPUT_VARIABLE RDCTST_RPATH64)
# combine lists
set(CMAKE_INSTALL_RPATH
${CMAKE_INSTALL_RPATH}
${RDCTST_RPATH}
${RDCTST_RPATH64})
${RDCTST_RPATH})
#
# Print out the build configuration being used: