Fix breaking changes introduced with CPU support

Changes introduced in 3bdca8b8b6
broke RDC if it was compiled without ESMI support, or if esmi driver is
not loaded when RDC is being used.

Change-Id: Id54e1e9002d2e3cf09240081149eed84178700af
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
This commit is contained in:
Galantsev, Dmitrii
2025-04-07 19:36:45 +00:00
committato da Galantsev, Dmitrii
parent 3bdca8b8b6
commit 0aeceefcb3
2 ha cambiato i file con 22 aggiunte e 10 eliminazioni
+19 -10
Vedi File
@@ -50,11 +50,18 @@ class smi_initializer {
// Make sure smi will not be initialized multiple times
amdsmi_shut_down();
amdsmi_status_t ret;
uint64_t init_flag_;
//initialize CPU and GPU instances
init_flag_ = AMDSMI_INIT_AMD_GPUS | AMDSMI_INIT_AMD_CPUS;
uint64_t init_flag_ = AMDSMI_INIT_AMD_GPUS;
#ifdef ENABLE_ESMI_LIB
init_flag_ = init_flag_ | AMDSMI_INIT_AMD_CPUS;
#endif
ret = amdsmi_init(init_flag_);
if (init_flag_ & AMDSMI_INIT_AMD_CPUS) {
RDC_LOG(RDC_ERROR,
"Failed to initalize amdsmi with CPUs and GPUs enabled.. trying GPUs only.");
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
}
if (ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR, "SMI FAILED with" << ret);
throw amd::rdc::RdcException(RDC_ST_FAIL_LOAD_MODULE, "SMI initialize fail");
}
}
@@ -198,14 +205,15 @@ rdc_status_t RdcEmbeddedHandler::rdc_device_get_all(uint32_t gpu_index_list[RDC_
}
// Discovery API
rdc_status_t RdcEmbeddedHandler::rdc_device_get_all_cpu(uint32_t cpu_index_list[RDC_MAX_NUM_DEVICES],
uint32_t* count) {
rdc_status_t RdcEmbeddedHandler::rdc_device_get_all_cpu(
uint32_t cpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) {
if (!count) {
return RDC_ST_BAD_PARAMETER;
}
rdc_field_value device_count;
rdc_status_t status = metric_fetcher_->fetch_smi_cpu_field(0, RDC_FI_DEV_CPU_COUNT, &device_count);
rdc_status_t status =
metric_fetcher_->fetch_smi_cpu_field(0, RDC_FI_DEV_CPU_COUNT, &device_count);
if (status != RDC_ST_OK) {
std::cout << "rdc_device_get_all_cpu failed to get cpu count";
return status;
@@ -216,9 +224,9 @@ rdc_status_t RdcEmbeddedHandler::rdc_device_get_all_cpu(uint32_t cpu_index_list[
for (uint32_t i = 0; i < *count; i++) {
cpu_index_list[i] = i;
}
return RDC_ST_OK;
}
rdc_status_t RdcEmbeddedHandler::rdc_device_get_attributes(uint32_t gpu_index,
rdc_device_attributes_t* p_rdc_attr) {
if (!p_rdc_attr) {
@@ -230,14 +238,15 @@ rdc_status_t RdcEmbeddedHandler::rdc_device_get_attributes(uint32_t gpu_index,
return status;
}
rdc_status_t RdcEmbeddedHandler::rdc_device_get_cpu_attributes(uint32_t cpu_index,
rdc_device_attributes_t* p_rdc_attr) {
rdc_status_t RdcEmbeddedHandler::rdc_device_get_cpu_attributes(
uint32_t cpu_index, rdc_device_attributes_t* p_rdc_attr) {
if (!p_rdc_attr) {
return RDC_ST_BAD_PARAMETER;
}
rdc_field_value device_name;
rdc_status_t status = metric_fetcher_->fetch_smi_cpu_field(cpu_index, RDC_FI_DEV_CPU_MODEL, &device_name);
rdc_status_t status =
metric_fetcher_->fetch_smi_cpu_field(cpu_index, RDC_FI_DEV_CPU_MODEL, &device_name);
strncpy_with_null(p_rdc_attr->device_name, device_name.value.str, RDC_MAX_STR_LENGTH);
return status;
@@ -1338,6 +1338,8 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_cpu_field(uint32_t cpu_index, rdc_f
value->status = Smi2RdcError(ret);
break;
}
#ifdef ENABLE_ESMI_LIB
// amdsmi_cpu_info_t is only defined if ENABLE_ESMI_LIB is set
case RDC_FI_DEV_CPU_MODEL: {
amdsmi_cpu_info_t cpu_info;
value->status = amdsmi_get_cpu_model_name(processor_handle, &cpu_info);
@@ -1347,6 +1349,7 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_cpu_field(uint32_t cpu_index, rdc_f
}
break;
}
#endif
default: {
RDC_LOG(RDC_ERROR, "field_id is not supported: " << field_id);
break;