From 437cb07db65cd36bf51522602ca2ad6eaa53c7b8 Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Wed, 8 May 2024 14:00:15 -0500 Subject: [PATCH] Discover the amdgpu when card numbers are not consecutive. When discover the amdgpu, if the assigned numbers are not consecutive, not all GPU can be discovered. The code is change to discover the GPU based on max card number. Change-Id: Icf4c1df4a1651093b5de3cd7a25a9bd69a299075 --- rocm_smi/src/rocm_smi_main.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rocm_smi/src/rocm_smi_main.cc b/rocm_smi/src/rocm_smi_main.cc index c078712e53..3b27d6aee9 100755 --- a/rocm_smi/src/rocm_smi_main.cc +++ b/rocm_smi/src/rocm_smi_main.cc @@ -707,6 +707,8 @@ static bool isAMDGPU(std::string dev_path) { uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { std::string err_msg; uint32_t count = 0; + int32_t cardId = 0; + int32_t max_cardId = -1; std::ostringstream ss; // If this gets called more than once, clear previous findings. @@ -730,6 +732,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { if ((strcmp(dentry->d_name, ".") == 0) || (strcmp(dentry->d_name, "..") == 0)) continue; + sscanf(&dentry->d_name[strlen(kDeviceNamePrefix)], "%d", &cardId); + if (cardId > max_cardId) + max_cardId = cardId; count++; } dentry = readdir(drm_dir); @@ -782,7 +787,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { uint32_t cardAdded = 0; // Discover all root cards & gpu partitions associated with each - for (uint32_t cardId = 0; cardId < count; cardId++) { + for (uint32_t cardId = 0; cardId <= max_cardId; cardId++) { std::string path = kPathDRMRoot; path += "/card"; path += std::to_string(cardId);