From e3741f678bc5d536b20fd7ed58d986df45e044f5 Mon Sep 17 00:00:00 2001 From: Sajina PK Date: Thu, 19 Jun 2025 16:23:30 -0400 Subject: [PATCH] Show VCN and JPEG busy values where VCN/JPEG activity is not supported. (#232) On AMD-SMI, in rocm 7.0, vcn_activity and jpeg_activity will not be reported when XCP (partition) stats, vcn_busy and jpeg_busy, are available. This causes the activity tracking to fail. The fix is to read the busy values when activity values are not supported. For issue: SWDEV-536439 --------- Co-authored-by: David Galiffi --- source/lib/core/gpu.cpp | 71 +++++---- source/lib/rocprof-sys/library/amd_smi.cpp | 169 +++++++++++++++++---- source/lib/rocprof-sys/library/amd_smi.hpp | 17 ++- 3 files changed, 183 insertions(+), 74 deletions(-) diff --git a/source/lib/core/gpu.cpp b/source/lib/core/gpu.cpp index c46a9db40c..1070a50657 100644 --- a/source/lib/core/gpu.cpp +++ b/source/lib/core/gpu.cpp @@ -73,6 +73,17 @@ check_amdsmi_error(amdsmi_status_t _code, const char* _file, int _line) _msg); } +// Ensures initialization happens only once +std::once_flag amdsmi_once; + +// Tracks whether AMD SMI is initialized +bool& +_amdsmi_is_initialized() +{ + static bool initialized = false; + return initialized; +} + bool amdsmi_init() { @@ -82,10 +93,12 @@ amdsmi_init() // Currently, only AMDSMI_INIT_AMD_GPUS is supported ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS)); get_processor_handles(); + _amdsmi_is_initialized() = true; // Mark as initialized } catch(std::exception& _e) { ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n", _e.what()); + _amdsmi_is_initialized() = false; // Mark as not initialized return false; } return true; @@ -143,7 +156,9 @@ bool initialize_amdsmi() { #if ROCPROFSYS_USE_ROCM > 0 - return (amdsmi_init()) ? true : false; + // Ensure initialization happens only once + std::call_once(amdsmi_once, amdsmi_init); + return _amdsmi_is_initialized(); #else return false; #endif @@ -252,45 +267,27 @@ get_processor_handles() processors::processors_list.push_back(processor); amdsmi_gpu_metrics_t gpu_metrics; - bool vcn_supported = false; - bool jpeg_supported = false; - bool v_busy_supported = false; - bool j_busy_supported = false; - ret = amdsmi_get_gpu_metrics_info(processor, &gpu_metrics); - if(ret == AMDSMI_STATUS_SUCCESS) + bool vcn_supported = false, jpeg_supported = false; + bool v_busy_supported = false, j_busy_supported = false; + // AMD SMI will not report VCN_activity and JPEG_activity, if VCN_busy or + // JPEG_busy fields are available. + if(amdsmi_get_gpu_metrics_info(processor, &gpu_metrics) == + AMDSMI_STATUS_SUCCESS) { - for(const auto& vcn_activity : gpu_metrics.vcn_activity) - { - if(vcn_activity != UINT16_MAX) - { - vcn_supported = true; - break; - } - } - for(const auto& jpeg_activity : gpu_metrics.jpeg_activity) - { - if(jpeg_activity != UINT16_MAX) - { - jpeg_supported = true; - break; - } - } + // Helper lambda to check if any value in the array is valid + auto has_valid = [](const auto& arr) { + return std::any_of(std::begin(arr), std::end(arr), + [](auto val) { return val != UINT16_MAX; }); + }; + vcn_supported = has_valid(gpu_metrics.vcn_activity); + jpeg_supported = has_valid(gpu_metrics.jpeg_activity); + // Check if VCN and JPEG busy metrics are available for(const auto& xcp : gpu_metrics.xcp_stats) { - if(!v_busy_supported) - { - v_busy_supported = - std::any_of(std::begin(xcp.vcn_busy), std::end(xcp.vcn_busy), - [](uint16_t val) { return val != UINT16_MAX; }); - } - - if(!j_busy_supported) - { - j_busy_supported = std::any_of( - std::begin(xcp.jpeg_busy), std::end(xcp.jpeg_busy), - [](uint16_t val) { return val != UINT16_MAX; }); - } - + if(!v_busy_supported && has_valid(xcp.vcn_busy)) + v_busy_supported = true; + if(!j_busy_supported && has_valid(xcp.jpeg_busy)) + j_busy_supported = true; if(v_busy_supported && j_busy_supported) break; } } diff --git a/source/lib/rocprof-sys/library/amd_smi.cpp b/source/lib/rocprof-sys/library/amd_smi.cpp index f840f8972f..c699c84830 100644 --- a/source/lib/rocprof-sys/library/amd_smi.cpp +++ b/source/lib/rocprof-sys/library/amd_smi.cpp @@ -143,6 +143,7 @@ data::sample(uint32_t _dev_id) auto _ts = tim::get_clock_real_now(); assert(_ts < std::numeric_limits::max()); amdsmi_gpu_metrics_t _gpu_metrics; + bool _vcn_or_jpeg_activity_enabled = false; auto _state = get_state().load(); @@ -184,18 +185,59 @@ data::sample(uint32_t _dev_id) #endif ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).mem_usage, amdsmi_get_gpu_memory_usage, sample_handle, AMDSMI_MEM_TYPE_VRAM, &m_mem_usage); - ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).vcn_activity, - amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics); - ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).jpeg_activity, - amdsmi_get_gpu_metrics_info, sample_handle, &_gpu_metrics); + _vcn_or_jpeg_activity_enabled = + get_settings(m_dev_id).vcn_activity || get_settings(m_dev_id).jpeg_activity; + ROCPROFSYS_AMDSMI_GET(_vcn_or_jpeg_activity_enabled, amdsmi_get_gpu_metrics_info, + sample_handle, &_gpu_metrics); - for(const auto& v_activity : _gpu_metrics.vcn_activity) + // Process metrics if either VCN or JPEG activity is enabled + if(_vcn_or_jpeg_activity_enabled) { - if(v_activity != UINT16_MAX) m_vcn_metrics.push_back(v_activity); - } - for(const auto& j_activity : _gpu_metrics.jpeg_activity) - { - if(j_activity != UINT16_MAX) m_jpeg_metrics.push_back(j_activity); + // Helper lambda to fill busy metrics from a source array + auto fill_busy_metrics = [](auto& dest, const auto& src) { + for(const auto& val : src) + { + if(val != UINT16_MAX) dest.push_back(val); + } + }; + + if(gpu::is_vcn_activity_supported(m_dev_id) && + gpu::is_jpeg_activity_supported(m_dev_id)) + { + // Both VCN and JPEG are supported - create one entry with both metrics + xcp_metrics_t metrics; + fill_busy_metrics(metrics.vcn_busy, _gpu_metrics.vcn_activity); + fill_busy_metrics(metrics.jpeg_busy, _gpu_metrics.jpeg_activity); + if(!metrics.vcn_busy.empty() || !metrics.jpeg_busy.empty()) + m_xcp_metrics.push_back(metrics); + } + else if(gpu::is_vcn_activity_supported(m_dev_id)) + { + // Only VCN is supported + xcp_metrics_t metrics; + fill_busy_metrics(metrics.vcn_busy, _gpu_metrics.vcn_activity); + if(!metrics.vcn_busy.empty()) m_xcp_metrics.push_back(metrics); + } + else if(gpu::is_jpeg_activity_supported(m_dev_id)) + { + // Only JPEG is supported + xcp_metrics_t metrics; + fill_busy_metrics(metrics.jpeg_busy, _gpu_metrics.jpeg_activity); + if(!metrics.jpeg_busy.empty()) m_xcp_metrics.push_back(metrics); + } + else + { + // Neither is supported - use XCP stats + // Each XCP gets one entry with both its VCN and JPEG metrics + for(const auto& xcp : _gpu_metrics.xcp_stats) + { + xcp_metrics_t metrics; + fill_busy_metrics(metrics.vcn_busy, xcp.vcn_busy); + fill_busy_metrics(metrics.jpeg_busy, xcp.jpeg_busy); + if(!metrics.vcn_busy.empty() || !metrics.jpeg_busy.empty()) + m_xcp_metrics.push_back(metrics); + } + } } #undef ROCPROFSYS_AMDSMI_GET @@ -350,16 +392,19 @@ data::post_process(uint32_t _dev_id) auto addendum = [&](const char* _v) { return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)"); }; - auto addendum_blk = [&](std::size_t _i, const char* _metric) { - if(_i < 10) + auto addendum_blk = [&](std::size_t _i, const char* _metric, + std::size_t xcp_idx = SIZE_MAX) { + if(xcp_idx != SIZE_MAX) { return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric, - JOIN("", "[0", _i, ']'), "(S)"); + JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""), + _i, ']'), + "(S)"); } else { return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric, - JOIN("", '[', _i, ']'), "(S)"); + JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)"); } }; @@ -378,15 +423,61 @@ data::post_process(uint32_t _dev_id) "megabytes"); if(_settings.vcn_activity) { - for(std::size_t i = 0; i < std::size(itr.m_vcn_metrics); ++i) - counter_track::emplace(_dev_id, addendum_blk(i, " VCN Activity"), - "%"); + if(itr.m_xcp_metrics.empty()) + { + ROCPROFSYS_VERBOSE( + 1, "No VCN activity data collected from device %u\n", + _dev_id); + } + else if(gpu::is_vcn_activity_supported(_dev_id)) + { + // For VCN activity, use simple indexing + for(std::size_t i = 0; + i < std::size(itr.m_xcp_metrics[0].vcn_busy); ++i) + counter_track::emplace(_dev_id, + addendum_blk(i, "VCN Activity"), "%"); + } + else + { + for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); + ++xcp) + { + for(std::size_t i = 0; + i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i) + { + counter_track::emplace( + _dev_id, addendum_blk(i, "VCN Activity", xcp), "%"); + } + } + } } if(_settings.jpeg_activity) { - for(std::size_t i = 0; i < std::size(itr.m_jpeg_metrics); ++i) - counter_track::emplace(_dev_id, addendum_blk(i, "JPEG Activity"), - "%"); + if(itr.m_xcp_metrics.empty()) + { + ROCPROFSYS_VERBOSE( + 1, "No JPEG activity data collected from device %u\n", + _dev_id); + } + else if(gpu::is_jpeg_activity_supported(_dev_id)) + { + // For JPEG activity, use simple indexing + for(std::size_t i = 0; + i < std::size(itr.m_xcp_metrics[0].jpeg_busy); ++i) + counter_track::emplace(_dev_id, + addendum_blk(i, "JPEG Activity"), "%"); + } + else + { + for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); + ++xcp) + { + for(std::size_t i = 0; + i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i) + counter_track::emplace( + _dev_id, addendum_blk(i, "JPEG Activity", xcp), "%"); + } + } } } uint64_t _ts = itr.m_ts; @@ -417,25 +508,41 @@ data::post_process(uint32_t _dev_id) if(_settings.mem_usage) TRACE_COUNTER("device_memory_usage", counter_track::at(_dev_id, _idx.at(5)), _ts, _usage); - if(_settings.vcn_activity) + if(_settings.vcn_activity && !itr.m_xcp_metrics.empty()) { uint64_t idx = _idx.at(6); - for(const auto& temp : itr.m_vcn_metrics) + // Iterate over all XCPs and their VCN busy/activity values + for(const auto& metrics : itr.m_xcp_metrics) { - TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx), - _ts, temp); - ++idx; + for(const auto& vcn_val : metrics.vcn_busy) + { + TRACE_COUNTER("device_vcn_activity", + counter_track::at(_dev_id, idx), _ts, vcn_val); + ++idx; + } } } - if(_settings.jpeg_activity) + + if(_settings.jpeg_activity && !itr.m_xcp_metrics.empty()) { uint64_t idx = _idx.at(7); - if(_settings.vcn_activity) idx += (itr.m_vcn_metrics.size() - 1); - for(const auto& temp : itr.m_jpeg_metrics) + // Calculate total VCN metrics to properly offset JPEG metrics index + if(_settings.vcn_activity) { - TRACE_COUNTER("device_jpeg_activity", counter_track::at(_dev_id, idx), - _ts, temp); - ++idx; + size_t total_vcn_metrics = 0; + for(const auto& metrics : itr.m_xcp_metrics) + total_vcn_metrics += metrics.vcn_busy.size(); + if(total_vcn_metrics > 0) idx += (total_vcn_metrics - 1); + } + // Iterate over all XCPs and their JPEG busy/activity values + for(const auto& metrics : itr.m_xcp_metrics) + { + for(const auto& jpeg_val : metrics.jpeg_busy) + { + TRACE_COUNTER("device_jpeg_activity", + counter_track::at(_dev_id, idx), _ts, jpeg_val); + ++idx; + } } } } diff --git a/source/lib/rocprof-sys/library/amd_smi.hpp b/source/lib/rocprof-sys/library/amd_smi.hpp index e094b2fdc5..ec5619434c 100644 --- a/source/lib/rocprof-sys/library/amd_smi.hpp +++ b/source/lib/rocprof-sys/library/amd_smi.hpp @@ -93,6 +93,12 @@ struct data using mem_usage_t = uint64_t; using temp_t = int64_t; + struct xcp_metrics_t + { + std::vector vcn_busy; + std::vector jpeg_busy; + }; + ROCPROFSYS_DEFAULT_OBJECT(data) explicit data(uint32_t _dev_id); @@ -102,12 +108,11 @@ struct data static void post_process(uint32_t _dev_id); - uint32_t m_dev_id = std::numeric_limits::max(); - timestamp_t m_ts = 0; - temp_t m_temp = 0; - mem_usage_t m_mem_usage = 0; - std::vector m_vcn_metrics = {}; - std::vector m_jpeg_metrics = {}; + uint32_t m_dev_id = std::numeric_limits::max(); + timestamp_t m_ts = 0; + temp_t m_temp = 0; + mem_usage_t m_mem_usage = 0; + std::vector m_xcp_metrics = {}; #if ROCPROFSYS_USE_ROCM > 0 amdsmi_engine_usage_t m_busy_perc = {}; amdsmi_power_info_t m_power = {};