562 строки
21 KiB
C++
562 строки
21 KiB
C++
/*
|
|
Copyright (c) 2022 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
#include "rdc_modules/rdc_rocp/RdcRocpBase.h"
|
|
|
|
#include <rocprofiler-sdk/fwd.h>
|
|
#include <rocprofiler-sdk/registration.h>
|
|
#include <rocprofiler-sdk/rocprofiler.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <stdexcept>
|
|
#include <vector>
|
|
|
|
// #include "hsa.h"
|
|
#include "amd_smi/amdsmi.h"
|
|
#include "rdc/rdc.h"
|
|
#include "rdc_lib/RdcLogger.h"
|
|
#include "rdc_lib/RdcTelemetryLibInterface.h"
|
|
#include "rdc_lib/impl/SmiUtils.h"
|
|
#include "rdc_modules/rdc_rocp/RdcRocpCounterSampler.h"
|
|
|
|
namespace amd {
|
|
namespace rdc {
|
|
|
|
static const std::map<rdc_field_t, const char*> temp_field_map_k = {
|
|
{RDC_FI_PROF_OCCUPANCY_PERCENT, "OccupancyPercent"},
|
|
{RDC_FI_PROF_ACTIVE_CYCLES, "GRBM_GUI_ACTIVE"},
|
|
{RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"},
|
|
{RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"},
|
|
{RDC_FI_PROF_TENSOR_ACTIVE_PERCENT,
|
|
"MfmaUtil"}, // same as TENSOR_ACTIVE but available for more GPUs
|
|
{RDC_FI_PROF_GPU_UTIL_PERCENT, "GPU_UTIL"}, // metric is divided by 100 to get percent
|
|
// metrics below are divided by time passed
|
|
{RDC_FI_PROF_EVAL_MEM_R_BW, "FETCH_SIZE"},
|
|
{RDC_FI_PROF_EVAL_MEM_W_BW, "WRITE_SIZE"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_16_PERCENT, "RDC_OPS_16_PER_SIMDCYCLE"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_32_PERCENT, "RDC_OPS_32_PER_SIMDCYCLE"},
|
|
{RDC_FI_PROF_EVAL_FLOPS_64_PERCENT, "RDC_OPS_64_PER_SIMDCYCLE"},
|
|
// metrics below are not divided by time passed
|
|
{RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "ValuPipeIssueUtil"},
|
|
{RDC_FI_PROF_SM_ACTIVE, "VALUBusy"},
|
|
{RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"},
|
|
{RDC_FI_PROF_OCC_ELAPSED,
|
|
"GRBM_GUI_ACTIVE"}, // this metric is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES
|
|
{RDC_FI_PROF_CPC_CPC_STAT_BUSY, "CPC_CPC_STAT_BUSY"},
|
|
{RDC_FI_PROF_CPC_CPC_STAT_IDLE, "CPC_CPC_STAT_IDLE"},
|
|
{RDC_FI_PROF_CPC_CPC_STAT_STALL, "CPC_CPC_STAT_STALL"},
|
|
{RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "CPC_CPC_TCIU_BUSY"},
|
|
{RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "CPC_CPC_TCIU_IDLE"},
|
|
{RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "CPC_CPC_UTCL2IU_BUSY"},
|
|
{RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "CPC_CPC_UTCL2IU_IDLE"},
|
|
{RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "CPC_CPC_UTCL2IU_STALL"},
|
|
{RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "CPC_ME1_BUSY_FOR_PACKET_DECODE"},
|
|
{RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "CPC_ME1_DC0_SPI_BUSY"},
|
|
{RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "CPC_UTCL1_STALL_ON_TRANSLATION"},
|
|
{RDC_FI_PROF_CPC_ALWAYS_COUNT, "CPC_ALWAYS_COUNT"},
|
|
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "CPC_ADC_VALID_CHUNK_NOT_AVAIL"},
|
|
{RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "CPC_ADC_DISPATCH_ALLOC_DONE"},
|
|
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "CPC_ADC_VALID_CHUNK_END"},
|
|
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "CPC_SYNC_FIFO_FULL_LEVEL"},
|
|
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "CPC_SYNC_FIFO_FULL"},
|
|
{RDC_FI_PROF_CPC_GD_BUSY, "CPC_GD_BUSY"},
|
|
{RDC_FI_PROF_CPC_TG_SEND, "CPC_TG_SEND"},
|
|
{RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "CPC_WALK_NEXT_CHUNK"},
|
|
{RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "CPC_STALLED_BY_SE0_SPI"},
|
|
{RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "CPC_STALLED_BY_SE1_SPI"},
|
|
{RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "CPC_STALLED_BY_SE2_SPI"},
|
|
{RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "CPC_STALLED_BY_SE3_SPI"},
|
|
{RDC_FI_PROF_CPC_LTE_ALL, "CPC_LTE_ALL"},
|
|
{RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "CPC_SYNC_WRREQ_FIFO_BUSY"},
|
|
{RDC_FI_PROF_CPC_CANE_BUSY, "CPC_CANE_BUSY"},
|
|
{RDC_FI_PROF_CPC_CANE_STALL, "CPC_CANE_STALL"},
|
|
{RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "CPF_CMP_UTCL1_STALL_ON_TRANSLATION"},
|
|
{RDC_FI_PROF_CPF_CPF_STAT_BUSY, "CPF_CPF_STAT_BUSY"},
|
|
{RDC_FI_PROF_CPF_CPF_STAT_IDLE, "CPF_CPF_STAT_IDLE"},
|
|
{RDC_FI_PROF_CPF_CPF_STAT_STALL, "CPF_CPF_STAT_STALL"},
|
|
{RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "CPF_CPF_TCIU_BUSY"},
|
|
{RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"},
|
|
{RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"},
|
|
{RDC_FI_PROF_SIMD_UTILIZATION, "SIMD_UTILIZATION"},
|
|
};
|
|
|
|
double RdcRocpBase::run_profiler(uint32_t agent_index, rdc_field_t field) {
|
|
thread_local std::vector<rocprofiler_record_counter_t> records;
|
|
|
|
auto counter_sampler = CounterSampler::get_samplers()[agent_index];
|
|
if (!counter_sampler) {
|
|
RDC_LOG(RDC_ERROR, "Error: Counter sampler not found for GPU index " << agent_index);
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
|
|
auto field_it = field_to_metric.find(field);
|
|
if (field_it == field_to_metric.end()) {
|
|
RDC_LOG(RDC_ERROR, "Error: Field " << field << " not found in field_to_metric map.");
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
const std::string& metric_id = field_it->second;
|
|
|
|
try {
|
|
counter_sampler->sample_counter_values({metric_id}, records, collection_duration_us_k);
|
|
} catch (const std::exception& e) {
|
|
RDC_LOG(RDC_ERROR, "Error while sampling counter values: " << e.what());
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
|
|
// Aggregate counter values. Rocprof v1/v2 summed values across dimensions.
|
|
double value = 0.0;
|
|
for (auto& record : records) {
|
|
value += record.counter_value; // Summing up values from all dimensions.
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
const char* RdcRocpBase::get_field_id_from_name(rdc_field_t field) {
|
|
auto it = field_to_metric.find(field);
|
|
if (it == field_to_metric.end()) {
|
|
RDC_LOG(RDC_ERROR, "Error: Field ID " << field << " not found in field_to_metric map.");
|
|
return "";
|
|
}
|
|
|
|
return field_to_metric.at(field);
|
|
}
|
|
|
|
const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
|
|
std::vector<rdc_field_t> field_ids;
|
|
field_ids.reserve(field_to_metric.size());
|
|
for (auto& [k, v] : field_to_metric) {
|
|
field_ids.push_back(k);
|
|
}
|
|
return field_ids;
|
|
}
|
|
|
|
rdc_status_t RdcRocpBase::map_entity_to_profiler() {
|
|
// std::map<uint32_t, uint32_t> entity_to_index_map;
|
|
// kfd_id_t is only used inside this function
|
|
typedef uint64_t kfd_id_t;
|
|
std::map<uint32_t, kfd_id_t> prof_kfd_map;
|
|
|
|
// populate profiler map
|
|
for (uint32_t prof_gpu_index = 0; prof_gpu_index < agents.size(); prof_gpu_index++) {
|
|
prof_kfd_map.insert({prof_gpu_index, agents[prof_gpu_index].gpu_id});
|
|
}
|
|
|
|
std::vector<amdsmi_socket_handle> sockets;
|
|
auto amdsmi_status = get_socket_handles(sockets);
|
|
if (amdsmi_status != AMDSMI_STATUS_SUCCESS) {
|
|
RDC_LOG(RDC_ERROR, "Failed to get socket handles: " << amdsmi_status);
|
|
return Smi2RdcError(amdsmi_status);
|
|
}
|
|
|
|
for (int socket_index = 0; socket_index < sockets.size(); socket_index++) {
|
|
auto* socket = sockets[socket_index];
|
|
std::vector<amdsmi_processor_handle> processors;
|
|
amdsmi_status = get_processor_handles(socket, processors);
|
|
if (amdsmi_status != AMDSMI_STATUS_SUCCESS) {
|
|
RDC_LOG(RDC_ERROR, "Failed to get processor handles for socket " << socket_index << ": "
|
|
<< amdsmi_status);
|
|
return Smi2RdcError(amdsmi_status);
|
|
}
|
|
|
|
for (int processor_index = 0; processor_index < processors.size(); processor_index++) {
|
|
auto* processor = processors[processor_index];
|
|
processor_type_t processor_type = AMDSMI_PROCESSOR_TYPE_UNKNOWN;
|
|
amdsmi_status = amdsmi_get_processor_type(processor, &processor_type);
|
|
if (amdsmi_status != AMDSMI_STATUS_SUCCESS) {
|
|
RDC_LOG(RDC_ERROR, "Failed to get processor type for processor "
|
|
<< processor_index << " on socket " << socket_index << ": "
|
|
<< amdsmi_status);
|
|
return Smi2RdcError(amdsmi_status);
|
|
}
|
|
if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
|
|
continue;
|
|
}
|
|
|
|
amdsmi_kfd_info_t kfd_info;
|
|
amdsmi_status = amdsmi_get_gpu_kfd_info(processor, &kfd_info);
|
|
if (amdsmi_status != AMDSMI_STATUS_SUCCESS) {
|
|
RDC_LOG(RDC_ERROR, "Failed to get KFD info for processor "
|
|
<< processor_index << " on socket " << socket_index << ": "
|
|
<< amdsmi_status);
|
|
return Smi2RdcError(amdsmi_status);
|
|
}
|
|
|
|
rdc_entity_info_t entity_info = {
|
|
.device_index = static_cast<uint32_t>(socket_index),
|
|
.instance_index = static_cast<uint32_t>(processor_index),
|
|
.entity_role = RDC_DEVICE_ROLE_PHYSICAL,
|
|
.device_type = RDC_DEVICE_TYPE_GPU,
|
|
};
|
|
|
|
uint32_t entity_index = rdc_get_entity_index_from_info(entity_info);
|
|
|
|
for (const auto& [prof_index, prof_id] : prof_kfd_map) {
|
|
if (std::memcmp(&kfd_info.kfd_id, &prof_id, sizeof(kfd_id_t)) == 0) {
|
|
// match found
|
|
// clang-format off
|
|
RDC_LOG(RDC_DEBUG, "SMI[" << entity_index << "] <-> Profiler[" << prof_index << "] = KFD_ID[" << prof_id << "]");
|
|
// clang-format on
|
|
if (entity_info.entity_role == RDC_DEVICE_ROLE_PHYSICAL) {
|
|
entity_index = rdc_get_entity_index_from_info(entity_info);
|
|
entity_to_prof_map.insert({entity_index, prof_index});
|
|
}
|
|
if (processors.size() > 1) {
|
|
// if there are multiple processors, also add entity with partition instance type
|
|
entity_info.entity_role = RDC_DEVICE_ROLE_PARTITION_INSTANCE;
|
|
entity_index = rdc_get_entity_index_from_info(entity_info);
|
|
entity_to_prof_map.insert({entity_index, prof_index});
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return RDC_ST_OK;
|
|
}
|
|
|
|
void RdcRocpBase::init_rocp_if_not() {
|
|
if (m_is_initialized) {
|
|
return;
|
|
}
|
|
|
|
// ensure initialization is attempted only once, even if it fails
|
|
m_is_initialized = true;
|
|
|
|
hsa_status_t status = hsa_init();
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
const char* errstr = nullptr;
|
|
hsa_status_string(status, &errstr);
|
|
throw std::runtime_error("hsa error code: " + std::to_string(status) + " " + errstr);
|
|
}
|
|
|
|
// check rocprofiler
|
|
if (int rocp_status = 0;
|
|
rocprofiler_is_initialized(&rocp_status) == ROCPROFILER_STATUS_SUCCESS && rocp_status != 1) {
|
|
throw std::runtime_error("Rocprofiler is not initialized. status: " +
|
|
std::to_string(rocp_status));
|
|
}
|
|
|
|
std::vector<std::string> all_fields;
|
|
std::vector<std::string> checked_fields;
|
|
|
|
// populate list of agents
|
|
agents = CounterSampler::get_available_agents();
|
|
RDC_LOG(RDC_DEBUG, "Agent count: " << agents.size());
|
|
samplers = CounterSampler::get_samplers();
|
|
|
|
map_entity_to_profiler();
|
|
|
|
// find intersection of supported and requested fields
|
|
uint32_t agent_index = 0;
|
|
auto& cs = *samplers[agent_index];
|
|
RDC_LOG(RDC_DEBUG, "agent_index[" << agent_index << "] location_id["
|
|
<< agents[agent_index].location_id << "]");
|
|
|
|
for (auto& [str, id] : CounterSampler::get_supported_counters(cs.get_agent())) {
|
|
checked_fields.emplace_back(str);
|
|
}
|
|
|
|
for (const auto& [k, v] : temp_field_map_k) {
|
|
auto found = std::find(checked_fields.begin(), checked_fields.end(), v);
|
|
if (found != checked_fields.end()) {
|
|
field_to_metric.insert({k, v});
|
|
}
|
|
}
|
|
|
|
// populate fields
|
|
all_fields.reserve(temp_field_map_k.size());
|
|
for (const auto& [k, v] : temp_field_map_k) {
|
|
all_fields.emplace_back(v);
|
|
}
|
|
|
|
RDC_LOG(RDC_DEBUG, "Profiler supports " << field_to_metric.size() << " fields");
|
|
}
|
|
|
|
RdcRocpBase::RdcRocpBase() {
|
|
// To verify if a field is actually supported by rocprofiler,
|
|
// initialization and agent querying are required.
|
|
// This initialization is deferred until the first call to rocp_lookup.
|
|
// Here, we define the potential fields that rocprofiler may support,
|
|
// allowing get_field_ids() to return them.
|
|
for (const auto& [k, v] : temp_field_map_k) {
|
|
field_to_metric.insert({k, v});
|
|
}
|
|
|
|
RDC_LOG(RDC_DEBUG, "Rocprofiler by default supports " << field_to_metric.size() << " fields");
|
|
}
|
|
|
|
RdcRocpBase::~RdcRocpBase() {
|
|
hsa_status_t status = HSA_STATUS_SUCCESS;
|
|
status = hsa_shut_down();
|
|
assert(status == HSA_STATUS_SUCCESS);
|
|
status = hsa_shut_down();
|
|
assert(status == HSA_STATUS_ERROR_NOT_INITIALIZED);
|
|
}
|
|
|
|
rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, rdc_field_value_data* data,
|
|
rdc_field_type_t* type) {
|
|
// default type
|
|
*type = DOUBLE;
|
|
|
|
// convert from entity to flat index
|
|
uint32_t agent_index = entity_to_prof_map[gpu_field.gpu_index];
|
|
const auto& field = gpu_field.field_id;
|
|
|
|
if (data == nullptr) {
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
|
|
init_rocp_if_not();
|
|
|
|
const auto start_time = std::chrono::high_resolution_clock::now();
|
|
// direct read from rocprofiler
|
|
const double read_dbl = run_profiler(agent_index, field);
|
|
const auto stop_time = std::chrono::high_resolution_clock::now();
|
|
const double elapsed = std::chrono::duration<double, std::milli>(stop_time - start_time).count();
|
|
|
|
// For OCC_ELAPSED, we need to read the occupancy metric as well
|
|
std::map<std::string, double> sampled_values;
|
|
if (field == RDC_FI_PROF_OCC_ELAPSED) {
|
|
const double occupancy_val = run_profiler(agent_index, RDC_FI_PROF_OCC_PER_ACTIVE_CU);
|
|
auto occ_field_it = field_to_metric.find(RDC_FI_PROF_OCC_PER_ACTIVE_CU);
|
|
if (occ_field_it != field_to_metric.end()) {
|
|
sampled_values[occ_field_it->second] = occupancy_val;
|
|
}
|
|
}
|
|
|
|
// Apply field transformations using the helper function
|
|
return apply_field_transformation(field, agent_index, read_dbl, elapsed, sampled_values, data,
|
|
type);
|
|
}
|
|
|
|
rdc_status_t RdcRocpBase::rocp_lookup_bulk(const std::vector<rdc_gpu_field_t>& fields,
|
|
std::vector<rdc_field_value_data>& values,
|
|
std::vector<rdc_field_type_t>& types,
|
|
std::vector<rdc_status_t>& statuses) {
|
|
if (fields.empty()) {
|
|
return RDC_ST_OK;
|
|
}
|
|
|
|
init_rocp_if_not();
|
|
|
|
// Resize output vectors
|
|
values.resize(fields.size());
|
|
types.resize(fields.size());
|
|
statuses.resize(fields.size());
|
|
|
|
// All fields should be for the same GPU
|
|
uint32_t agent_index = entity_to_prof_map[fields[0].gpu_index];
|
|
|
|
// Collect all unique metric names needed for sampling
|
|
std::vector<std::string> metrics_to_sample;
|
|
std::map<rdc_field_t, size_t> field_to_metric_index; // Maps field to position in metrics_to_sample
|
|
|
|
for (size_t i = 0; i < fields.size(); i++) {
|
|
const auto& field = fields[i].field_id;
|
|
types[i] = DOUBLE; // Default type
|
|
statuses[i] = RDC_ST_OK;
|
|
|
|
// Get metric name for this field
|
|
auto field_it = field_to_metric.find(field);
|
|
if (field_it == field_to_metric.end()) {
|
|
RDC_LOG(RDC_ERROR, "Error: Field " << field << " not found in field_to_metric map.");
|
|
statuses[i] = RDC_ST_BAD_PARAMETER;
|
|
continue;
|
|
}
|
|
|
|
const std::string metric_name = field_it->second;
|
|
|
|
// Check if we've already added this metric
|
|
if (field_to_metric_index.find(field) == field_to_metric_index.end()) {
|
|
field_to_metric_index[field] = metrics_to_sample.size();
|
|
metrics_to_sample.push_back(metric_name);
|
|
}
|
|
|
|
// Special case: RDC_FI_PROF_OCC_ELAPSED needs two metrics
|
|
if (field == RDC_FI_PROF_OCC_ELAPSED) {
|
|
if (field_to_metric_index.find(RDC_FI_PROF_OCC_PER_ACTIVE_CU) == field_to_metric_index.end()) {
|
|
auto occ_field_it = field_to_metric.find(RDC_FI_PROF_OCC_PER_ACTIVE_CU);
|
|
if (occ_field_it != field_to_metric.end()) {
|
|
field_to_metric_index[RDC_FI_PROF_OCC_PER_ACTIVE_CU] = metrics_to_sample.size();
|
|
metrics_to_sample.push_back(occ_field_it->second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sample all counters at once using greedy packing
|
|
std::map<std::string, double> sampled_values;
|
|
auto counter_sampler = CounterSampler::get_samplers()[agent_index];
|
|
if (!counter_sampler) {
|
|
RDC_LOG(RDC_ERROR, "Error: Counter sampler not found for GPU index " << agent_index);
|
|
for (size_t i = 0; i < fields.size(); i++) {
|
|
statuses[i] = RDC_ST_BAD_PARAMETER;
|
|
}
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
|
|
const auto start_time = std::chrono::high_resolution_clock::now();
|
|
if (!metrics_to_sample.empty()) {
|
|
try {
|
|
counter_sampler->sample_counters_with_packing(metrics_to_sample, sampled_values,
|
|
collection_duration_us_k);
|
|
} catch (const std::exception& e) {
|
|
RDC_LOG(RDC_ERROR, "Error while sampling counter values: " << e.what());
|
|
for (size_t i = 0; i < fields.size(); i++) {
|
|
statuses[i] = RDC_ST_BAD_PARAMETER;
|
|
}
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
}
|
|
const auto stop_time = std::chrono::high_resolution_clock::now();
|
|
const double elapsed = std::chrono::duration<double, std::milli>(stop_time - start_time).count();
|
|
|
|
// Process results for each field
|
|
for (size_t i = 0; i < fields.size(); i++) {
|
|
const auto& field = fields[i].field_id;
|
|
|
|
// Skip fields that had errors earlier
|
|
if (statuses[i] != RDC_ST_OK) {
|
|
continue;
|
|
}
|
|
|
|
// Get the sampled value for this field
|
|
auto field_it = field_to_metric.find(field);
|
|
if (field_it == field_to_metric.end()) {
|
|
continue;
|
|
}
|
|
|
|
const std::string& metric_name = field_it->second;
|
|
auto sampled_it = sampled_values.find(metric_name);
|
|
if (sampled_it == sampled_values.end()) {
|
|
RDC_LOG(RDC_ERROR, "Error: Metric " << metric_name << " not found in sampled values.");
|
|
statuses[i] = RDC_ST_BAD_PARAMETER;
|
|
continue;
|
|
}
|
|
|
|
double read_dbl = sampled_it->second;
|
|
|
|
// Apply field transformation using the helper function
|
|
statuses[i] = apply_field_transformation(field, agent_index, read_dbl, elapsed,
|
|
sampled_values, &values[i], &types[i]);
|
|
}
|
|
|
|
return RDC_ST_OK;
|
|
}
|
|
|
|
rdc_status_t RdcRocpBase::apply_field_transformation(
|
|
rdc_field_t field, uint32_t agent_index, double raw_value, double elapsed_time_ms,
|
|
const std::map<std::string, double>& sampled_values, rdc_field_value_data* output,
|
|
rdc_field_type_t* type) {
|
|
|
|
// Default type is DOUBLE
|
|
*type = DOUBLE;
|
|
|
|
const bool is_eval_field = (eval_fields.find(field) != eval_fields.end());
|
|
|
|
// Calculate divided value for eval fields
|
|
double divided_dbl = NAN;
|
|
if (is_eval_field) {
|
|
if (elapsed_time_ms != 0.0) {
|
|
divided_dbl = raw_value / (elapsed_time_ms / 1000.0);
|
|
} else {
|
|
RDC_LOG(RDC_ERROR, "Error: Elapsed time is zero. Cannot divide by zero.");
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
}
|
|
|
|
switch (field) {
|
|
case RDC_FI_PROF_GPU_UTIL_PERCENT:
|
|
output->dbl = raw_value / 100.0F;
|
|
break;
|
|
|
|
case RDC_FI_PROF_OCC_ELAPSED: {
|
|
const double active_cycles_val = raw_value;
|
|
if (active_cycles_val != 0.0) {
|
|
// Look for the occupancy metric in sampled values
|
|
auto occ_field_it = field_to_metric.find(RDC_FI_PROF_OCC_PER_ACTIVE_CU);
|
|
if (occ_field_it != field_to_metric.end()) {
|
|
auto occ_sampled_it = sampled_values.find(occ_field_it->second);
|
|
if (occ_sampled_it != sampled_values.end()) {
|
|
const double occupancy_val = occ_sampled_it->second;
|
|
output->dbl = occupancy_val / active_cycles_val;
|
|
} else {
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
} else {
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
} else {
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
} break;
|
|
|
|
case RDC_FI_PROF_EVAL_FLOPS_16_PERCENT: {
|
|
if (!is_eval_field) {
|
|
RDC_LOG(RDC_ERROR, "Field expected to be in the eval_fields list but it isn't!");
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
const std::string target_version = agents[agent_index].name;
|
|
const bool isMI200 = (target_version.find("gfx90a") != std::string::npos);
|
|
if (isMI200) {
|
|
output->dbl = divided_dbl / (1024.0F / static_cast<double>(agents[agent_index].simd_per_cu));
|
|
} else { // Assume mi300
|
|
output->dbl = divided_dbl / (2048.0F / static_cast<double>(agents[agent_index].simd_per_cu));
|
|
}
|
|
} break;
|
|
|
|
case RDC_FI_PROF_EVAL_FLOPS_32_PERCENT:
|
|
case RDC_FI_PROF_EVAL_FLOPS_64_PERCENT:
|
|
if (!is_eval_field) {
|
|
RDC_LOG(RDC_ERROR, "Field expected to be in the eval_fields list but it isn't!");
|
|
return RDC_ST_BAD_PARAMETER;
|
|
}
|
|
output->dbl = divided_dbl / (256.0F / static_cast<double>(agents[agent_index].simd_per_cu));
|
|
break;
|
|
|
|
default:
|
|
if (is_eval_field) {
|
|
output->dbl = divided_dbl;
|
|
} else {
|
|
output->dbl = raw_value;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return RDC_ST_OK;
|
|
}
|
|
|
|
} // namespace rdc
|
|
} // namespace amd
|