From 96a28009fca650fba4999c54913eee7124ed79c4 Mon Sep 17 00:00:00 2001 From: "Pryor, Adam" Date: Tue, 19 Aug 2025 18:53:16 -0500 Subject: [PATCH] [SWDEV-544620] Add kfd fallback for GPU Processes (#631) Signed-off-by: adapryor [ROCm/amdsmi commit: b62900c3726a18f7174a68563697c1f7504a7dda] --- projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc index 8077c1e7ab..8db11b973d 100644 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc @@ -383,6 +383,38 @@ int GetProcessGPUs(uint32_t pid, std::unordered_set *gpu_set) { q_dentry = readdir(queues_dir_hd); } + // if no queues were present, fallback to grab KFD GPU IDs from parent dir names + if (gpu_set->empty()) { + + std::string pdir = std::string(kKFDProcPathRoot) + "/" + std::to_string(pid); + auto queues_dir_kfd = opendir(pdir.c_str()); + + if (queues_dir_kfd == nullptr) { + std::string err_str = "Unable to open KFD process directory for process "; + err_str += std::to_string(pid); + perror(err_str.c_str()); + return ESRCH; + } + + struct dirent* e; + + while ((e = readdir(queues_dir_kfd))) { + + // These files encode the KFD GPU ID when process is running + if (!strncmp(e->d_name, "stats_", 6)) { + gpu_set->insert(strtoull(e->d_name + 6, nullptr, 10)); + } else if (!strncmp(e->d_name, "vram_", 5)) { + gpu_set->insert(strtoull(e->d_name + 5, nullptr, 10)); + } else if (!strncmp(e->d_name, "counters_", 9)) { + gpu_set->insert(strtoull(e->d_name + 9, nullptr, 10)); + } else if (!strncmp(e->d_name, "sdma_", 5)) { + gpu_set->insert(strtoull(e->d_name + 5, nullptr, 10)); + } + } + + closedir(queues_dir_kfd); + } + errno = 0; if (closedir(queues_dir_hd)) { return errno;