From 5ec7b213e417b9e5c0947ca3bbef97358c22b48b Mon Sep 17 00:00:00 2001 From: "Narlo, Joseph" Date: Fri, 17 Oct 2025 08:57:57 -0500 Subject: [PATCH] [SWDEV-555807] TestCudaMallocAsync test power draw failing (#755) * Clarified comments regarding power limit retrieval and its support on virtualized systems. * Change unsupported comment to UINT32_MAX --------- Signed-off-by: josnarlo Signed-off-by: Arif, Maisam [ROCm/amdsmi commit: 460cfcba1f5f679822bbb16a3465306f22c7f43a] --- projects/amdsmi/include/amd_smi/amdsmi.h | 5 ++++- projects/amdsmi/src/amd_smi/amd_smi.cc | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 7d2ae580f8..7f636fd990 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -1085,6 +1085,8 @@ typedef struct { /** * @brief Power Information * + * Unsupported struct members are set to UINT32_MAX + * * @cond @tag{gpu_bm_linux} @endcond */ typedef struct { @@ -6259,9 +6261,10 @@ amdsmi_get_gpu_activity(amdsmi_processor_handle processor_handle, amdsmi_engine_ * * @ingroup tagGPUMonitor * - * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} + * @platform{gpu_bm_linux} @platform{host} @platform{guest_windows} @platform{guest_1vf} * * @note amdsmi_power_info_t::socket_power metric can rarely spike above the socket power limit in some cases + * @note unsupported struct members are set to UINT32_MAX * * @param[in] processor_handle PF of a processor for which to query * diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 1bd160dc18..83a6310f5a 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -4468,11 +4468,14 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf } int power_limit = 0; - status = smi_amdgpu_get_power_cap(gpu_device, &power_limit); - if (status == AMDSMI_STATUS_SUCCESS) { + amdsmi_status_t status2 = smi_amdgpu_get_power_cap(gpu_device, &power_limit); + if (status2 == AMDSMI_STATUS_SUCCESS) { info->power_limit = power_limit; } + // Returning status from amdsmi_get_gpu_metrics_info() which should return SUCCESS + // Getting power cap values may not be supported on all virtualized systems and should + // not return a failure when the metrics values are ascertainable. return status; }