From 33bbb8efde57ca4b65907aa56b58bddcb1f3015b Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Wed, 8 May 2024 13:14:39 -0500 Subject: [PATCH] Discover the amdgpu when card numbers are not consecutive. When discover the amdgpu, if the assigned numbers are not consecutive, not all GPU can be discovered. The code is change to discover the GPU based on max card number. Change-Id: I8b6a8b49594d6a54c7feb2645bedb83dc5c1b4cc [ROCm/rocm_smi_lib commit: 8c444164103bec701ff24c231eddc0eb36fdbef6] --- projects/rocm-smi-lib/src/rocm_smi_main.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/rocm-smi-lib/src/rocm_smi_main.cc b/projects/rocm-smi-lib/src/rocm_smi_main.cc index 03c8b61375..7d6edea648 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_main.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_main.cc @@ -713,6 +713,8 @@ static bool isAMDGPU(std::string dev_path) { uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { std::string err_msg; uint32_t count = 0; + int32_t cardId = 0; + int32_t max_cardId = -1; std::ostringstream ss; // If this gets called more than once, clear previous findings. @@ -736,6 +738,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { if ((strcmp(dentry->d_name, ".") == 0) || (strcmp(dentry->d_name, "..") == 0)) continue; + sscanf(&dentry->d_name[strlen(kDeviceNamePrefix)], "%d", &cardId); + if (cardId > max_cardId) + max_cardId = cardId; count++; } dentry = readdir(drm_dir); @@ -818,7 +823,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { uint32_t cardAdded = 0; // Discover all root cards & gpu partitions associated with each - for (uint32_t cardId = 0; cardId < count; cardId++) { + for (uint32_t cardId = 0; cardId <= max_cardId; cardId++) { std::string path = kPathDRMRoot; path += "/card"; path += std::to_string(cardId);