diff --git a/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 5765ea7f04..473795507d 100644 --- a/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -117,16 +117,19 @@ rdc_status_t RdcEmbeddedHandler::get_gpu_gauges(rdc_gpu_gauges_t* gpu_gauges) { RDC_LOG(RDC_ERROR, "Fail to get total memory of GPU " << gpu_index_list[i]); return status; } - gpu_gauges->insert({{gpu_index_list[i], RDC_FI_GPU_MEMORY_TOTAL}, value.value.l_int}); + gpu_gauges->insert( + {{gpu_index_list[i], RDC_FI_GPU_MEMORY_TOTAL}, static_cast(value.value.l_int)}); status = metric_fetcher_->fetch_smi_field(gpu_index_list[i], RDC_FI_ECC_CORRECT_TOTAL, &value); if (status == RDC_ST_OK) { - gpu_gauges->insert({{gpu_index_list[i], RDC_FI_ECC_CORRECT_TOTAL}, value.value.l_int}); + gpu_gauges->insert({{gpu_index_list[i], RDC_FI_ECC_CORRECT_TOTAL}, + static_cast(value.value.l_int)}); } status = metric_fetcher_->fetch_smi_field(gpu_index_list[i], RDC_FI_ECC_UNCORRECT_TOTAL, &value); if (status == RDC_ST_OK) { - gpu_gauges->insert({{gpu_index_list[i], RDC_FI_ECC_UNCORRECT_TOTAL}, value.value.l_int}); + gpu_gauges->insert({{gpu_index_list[i], RDC_FI_ECC_UNCORRECT_TOTAL}, + static_cast(value.value.l_int)}); } } return RDC_ST_OK; diff --git a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index 560ded900a..a5f8dad214 100644 --- a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -32,6 +32,7 @@ THE SOFTWARE. #include "amd_smi/amdsmi.h" #include "common/rdc_capabilities.h" #include "common/rdc_fields_supported.h" +#include "rdc/rdc.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/impl/SmiUtils.h" #include "rdc_lib/rdc_common.h" @@ -436,17 +437,26 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field value->status = amdsmi_get_power_info(processor_handle, &power_info); value->type = INTEGER; if (value->status != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "amdsmi_get_power_info failed!"); break; } // Use current_socket_power if average_socket_power is not available if (power_info.average_socket_power != 65535) { + RDC_LOG(RDC_DEBUG, "AMDSMI: using average_socket_power"); value->value.l_int = static_cast(power_info.average_socket_power) * 1000 * 1000; - } else { - value->value.l_int = static_cast(power_info.current_socket_power) * 1000 * 1000; + break; } - break; + if (power_info.current_socket_power != 65535) { + RDC_LOG(RDC_DEBUG, "AMDSMI: using current_socket_power"); + value->value.l_int = static_cast(power_info.current_socket_power) * 1000 * 1000; + break; + } + + value->status = AMDSMI_STATUS_NOT_SUPPORTED; + RDC_LOG(RDC_ERROR, "AMDSMI: cannot get POWER_USAGE"); + return RDC_ST_NO_DATA; } case RDC_FI_GPU_CLOCK: case RDC_FI_MEM_CLOCK: {