Show VCN and JPEG busy values where VCN/JPEG activity is not supported. (#232)

On AMD-SMI, in rocm 7.0, vcn_activity and jpeg_activity will not be reported when XCP (partition) stats, vcn_busy and jpeg_busy, are available. This causes the activity tracking to fail. The fix is to read the busy values when activity values are not supported.

For issue: SWDEV-536439

---------

Co-authored-by: David Galiffi <David.Galiffi@amd.com>
此提交包含在:
Sajina PK
2025-06-19 16:23:30 -04:00
提交者 GitHub
父節點 20efb9641f
當前提交 e3741f678b
共有 3 個檔案被更改,包括 183 行新增74 行删除
+34 -37
查看文件
@@ -73,6 +73,17 @@ check_amdsmi_error(amdsmi_status_t _code, const char* _file, int _line)
_msg);
}
// Ensures initialization happens only once
std::once_flag amdsmi_once;
// Tracks whether AMD SMI is initialized
bool&
_amdsmi_is_initialized()
{
static bool initialized = false;
return initialized;
}
bool
amdsmi_init()
{
@@ -82,10 +93,12 @@ amdsmi_init()
// Currently, only AMDSMI_INIT_AMD_GPUS is supported
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
get_processor_handles();
_amdsmi_is_initialized() = true; // Mark as initialized
} catch(std::exception& _e)
{
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n",
_e.what());
_amdsmi_is_initialized() = false; // Mark as not initialized
return false;
}
return true;
@@ -143,7 +156,9 @@ bool
initialize_amdsmi()
{
#if ROCPROFSYS_USE_ROCM > 0
return (amdsmi_init()) ? true : false;
// Ensure initialization happens only once
std::call_once(amdsmi_once, amdsmi_init);
return _amdsmi_is_initialized();
#else
return false;
#endif
@@ -252,45 +267,27 @@ get_processor_handles()
processors::processors_list.push_back(processor);
amdsmi_gpu_metrics_t gpu_metrics;
bool vcn_supported = false;
bool jpeg_supported = false;
bool v_busy_supported = false;
bool j_busy_supported = false;
ret = amdsmi_get_gpu_metrics_info(processor, &gpu_metrics);
if(ret == AMDSMI_STATUS_SUCCESS)
bool vcn_supported = false, jpeg_supported = false;
bool v_busy_supported = false, j_busy_supported = false;
// AMD SMI will not report VCN_activity and JPEG_activity, if VCN_busy or
// JPEG_busy fields are available.
if(amdsmi_get_gpu_metrics_info(processor, &gpu_metrics) ==
AMDSMI_STATUS_SUCCESS)
{
for(const auto& vcn_activity : gpu_metrics.vcn_activity)
{
if(vcn_activity != UINT16_MAX)
{
vcn_supported = true;
break;
}
}
for(const auto& jpeg_activity : gpu_metrics.jpeg_activity)
{
if(jpeg_activity != UINT16_MAX)
{
jpeg_supported = true;
break;
}
}
// Helper lambda to check if any value in the array is valid
auto has_valid = [](const auto& arr) {
return std::any_of(std::begin(arr), std::end(arr),
[](auto val) { return val != UINT16_MAX; });
};
vcn_supported = has_valid(gpu_metrics.vcn_activity);
jpeg_supported = has_valid(gpu_metrics.jpeg_activity);
// Check if VCN and JPEG busy metrics are available
for(const auto& xcp : gpu_metrics.xcp_stats)
{
if(!v_busy_supported)
{
v_busy_supported =
std::any_of(std::begin(xcp.vcn_busy), std::end(xcp.vcn_busy),
[](uint16_t val) { return val != UINT16_MAX; });
}
if(!j_busy_supported)
{
j_busy_supported = std::any_of(
std::begin(xcp.jpeg_busy), std::end(xcp.jpeg_busy),
[](uint16_t val) { return val != UINT16_MAX; });
}
if(!v_busy_supported && has_valid(xcp.vcn_busy))
v_busy_supported = true;
if(!j_busy_supported && has_valid(xcp.jpeg_busy))
j_busy_supported = true;
if(v_busy_supported && j_busy_supported) break;
}
}
+138 -31
查看文件
@@ -143,6 +143,7 @@ data::sample(uint32_t _dev_id)
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
assert(_ts < std::numeric_limits<int64_t>::max());
amdsmi_gpu_metrics_t _gpu_metrics;
bool _vcn_or_jpeg_activity_enabled = false;
auto _state = get_state().load();
@@ -184,18 +185,59 @@ data::sample(uint32_t _dev_id)
#endif
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage,
sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).vcn_activity,
amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).jpeg_activity,
amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics);
_vcn_or_jpeg_activity_enabled =
get_settings(m_dev_id).vcn_activity || get_settings(m_dev_id).jpeg_activity;
ROCPROFSYS_AMDSMI_GET(_vcn_or_jpeg_activity_enabled, amdsmi_get_gpu_metrics_info,
sample_handle, &_gpu_metrics);
for(const auto& v_activity : _gpu_metrics.vcn_activity)
// Process metrics if either VCN or JPEG activity is enabled
if(_vcn_or_jpeg_activity_enabled)
{
if(v_activity != UINT16_MAX) m_vcn_metrics.push_back(v_activity);
}
for(const auto& j_activity : _gpu_metrics.jpeg_activity)
{
if(j_activity != UINT16_MAX) m_jpeg_metrics.push_back(j_activity);
// Helper lambda to fill busy metrics from a source array
auto fill_busy_metrics = [](auto& dest, const auto& src) {
for(const auto& val : src)
{
if(val != UINT16_MAX) dest.push_back(val);
}
};
if(gpu::is_vcn_activity_supported(m_dev_id) &&
gpu::is_jpeg_activity_supported(m_dev_id))
{
// Both VCN and JPEG are supported - create one entry with both metrics
xcp_metrics_t metrics;
fill_busy_metrics(metrics.vcn_busy, _gpu_metrics.vcn_activity);
fill_busy_metrics(metrics.jpeg_busy, _gpu_metrics.jpeg_activity);
if(!metrics.vcn_busy.empty() || !metrics.jpeg_busy.empty())
m_xcp_metrics.push_back(metrics);
}
else if(gpu::is_vcn_activity_supported(m_dev_id))
{
// Only VCN is supported
xcp_metrics_t metrics;
fill_busy_metrics(metrics.vcn_busy, _gpu_metrics.vcn_activity);
if(!metrics.vcn_busy.empty()) m_xcp_metrics.push_back(metrics);
}
else if(gpu::is_jpeg_activity_supported(m_dev_id))
{
// Only JPEG is supported
xcp_metrics_t metrics;
fill_busy_metrics(metrics.jpeg_busy, _gpu_metrics.jpeg_activity);
if(!metrics.jpeg_busy.empty()) m_xcp_metrics.push_back(metrics);
}
else
{
// Neither is supported - use XCP stats
// Each XCP gets one entry with both its VCN and JPEG metrics
for(const auto& xcp : _gpu_metrics.xcp_stats)
{
xcp_metrics_t metrics;
fill_busy_metrics(metrics.vcn_busy, xcp.vcn_busy);
fill_busy_metrics(metrics.jpeg_busy, xcp.jpeg_busy);
if(!metrics.vcn_busy.empty() || !metrics.jpeg_busy.empty())
m_xcp_metrics.push_back(metrics);
}
}
}
#undef ROCPROFSYS_AMDSMI_GET
@@ -350,16 +392,19 @@ data::post_process(uint32_t _dev_id)
auto addendum = [&](const char* _v) {
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
};
auto addendum_blk = [&](std::size_t _i, const char* _metric) {
if(_i < 10)
auto addendum_blk = [&](std::size_t _i, const char* _metric,
std::size_t xcp_idx = SIZE_MAX) {
if(xcp_idx != SIZE_MAX)
{
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", "[0", _i, ']'), "(S)");
JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""),
_i, ']'),
"(S)");
}
else
{
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", '[', _i, ']'), "(S)");
JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)");
}
};
@@ -378,15 +423,61 @@ data::post_process(uint32_t _dev_id)
"megabytes");
if(_settings.vcn_activity)
{
for(std::size_t i = 0; i < std::size(itr.m_vcn_metrics); ++i)
counter_track::emplace(_dev_id, addendum_blk(i, " VCN Activity"),
"%");
if(itr.m_xcp_metrics.empty())
{
ROCPROFSYS_VERBOSE(
1, "No VCN activity data collected from device %u\n",
_dev_id);
}
else if(gpu::is_vcn_activity_supported(_dev_id))
{
// For VCN activity, use simple indexing
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[0].vcn_busy); ++i)
counter_track::emplace(_dev_id,
addendum_blk(i, "VCN Activity"), "%");
}
else
{
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
++xcp)
{
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i)
{
counter_track::emplace(
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
}
}
}
}
if(_settings.jpeg_activity)
{
for(std::size_t i = 0; i < std::size(itr.m_jpeg_metrics); ++i)
counter_track::emplace(_dev_id, addendum_blk(i, "JPEG Activity"),
"%");
if(itr.m_xcp_metrics.empty())
{
ROCPROFSYS_VERBOSE(
1, "No JPEG activity data collected from device %u\n",
_dev_id);
}
else if(gpu::is_jpeg_activity_supported(_dev_id))
{
// For JPEG activity, use simple indexing
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[0].jpeg_busy); ++i)
counter_track::emplace(_dev_id,
addendum_blk(i, "JPEG Activity"), "%");
}
else
{
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
++xcp)
{
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i)
counter_track::emplace(
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
}
}
}
}
uint64_t _ts = itr.m_ts;
@@ -417,25 +508,41 @@ data::post_process(uint32_t _dev_id)
if(_settings.mem_usage)
TRACE_COUNTER("device_memory_usage",
counter_track::at(_dev_id, _idx.at(5)), _ts, _usage);
if(_settings.vcn_activity)
if(_settings.vcn_activity && !itr.m_xcp_metrics.empty())
{
uint64_t idx = _idx.at(6);
for(const auto& temp : itr.m_vcn_metrics)
// Iterate over all XCPs and their VCN busy/activity values
for(const auto& metrics : itr.m_xcp_metrics)
{
TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx),
_ts, temp);
++idx;
for(const auto& vcn_val : metrics.vcn_busy)
{
TRACE_COUNTER("device_vcn_activity",
counter_track::at(_dev_id, idx), _ts, vcn_val);
++idx;
}
}
}
if(_settings.jpeg_activity)
if(_settings.jpeg_activity && !itr.m_xcp_metrics.empty())
{
uint64_t idx = _idx.at(7);
if(_settings.vcn_activity) idx += (itr.m_vcn_metrics.size() - 1);
for(const auto& temp : itr.m_jpeg_metrics)
// Calculate total VCN metrics to properly offset JPEG metrics index
if(_settings.vcn_activity)
{
TRACE_COUNTER("device_jpeg_activity", counter_track::at(_dev_id, idx),
_ts, temp);
++idx;
size_t total_vcn_metrics = 0;
for(const auto& metrics : itr.m_xcp_metrics)
total_vcn_metrics += metrics.vcn_busy.size();
if(total_vcn_metrics > 0) idx += (total_vcn_metrics - 1);
}
// Iterate over all XCPs and their JPEG busy/activity values
for(const auto& metrics : itr.m_xcp_metrics)
{
for(const auto& jpeg_val : metrics.jpeg_busy)
{
TRACE_COUNTER("device_jpeg_activity",
counter_track::at(_dev_id, idx), _ts, jpeg_val);
++idx;
}
}
}
}
+11 -6
查看文件
@@ -93,6 +93,12 @@ struct data
using mem_usage_t = uint64_t;
using temp_t = int64_t;
struct xcp_metrics_t
{
std::vector<uint16_t> vcn_busy;
std::vector<uint16_t> jpeg_busy;
};
ROCPROFSYS_DEFAULT_OBJECT(data)
explicit data(uint32_t _dev_id);
@@ -102,12 +108,11 @@ struct data
static void post_process(uint32_t _dev_id);
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
timestamp_t m_ts = 0;
temp_t m_temp = 0;
mem_usage_t m_mem_usage = 0;
std::vector<uint16_t> m_vcn_metrics = {};
std::vector<uint16_t> m_jpeg_metrics = {};
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
timestamp_t m_ts = 0;
temp_t m_temp = 0;
mem_usage_t m_mem_usage = 0;
std::vector<xcp_metrics_t> m_xcp_metrics = {};
#if ROCPROFSYS_USE_ROCM > 0
amdsmi_engine_usage_t m_busy_perc = {};
amdsmi_power_info_t m_power = {};