From e30ebbc7878cfee21c6c1bca3549901d2382e5ff Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 22 May 2020 22:13:03 -0400 Subject: [PATCH] Add support to retrieve process VRAM usage information. Change-Id: I60843a99207a658022a26aa346b79f91863833cf --- include/rocm_smi/rocm_smi.h | 1 + include/rocm_smi/rocm_smi_kfd.h | 3 ++- src/rocm_smi.cc | 12 +++++++++++- src/rocm_smi_kfd.cc | 26 +++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index af53bb7e0c..803417576b 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -712,6 +712,7 @@ typedef struct { typedef struct { uint32_t process_id; //!< Process ID uint32_t pasid; //!< PASID + uint64_t vram_usage; //!< VRAM usage } rsmi_process_info_t; diff --git a/include/rocm_smi/rocm_smi_kfd.h b/include/rocm_smi/rocm_smi_kfd.h index cff8def68e..d355938ec8 100755 --- a/include/rocm_smi/rocm_smi_kfd.h +++ b/include/rocm_smi/rocm_smi_kfd.h @@ -98,7 +98,8 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated, uint32_t *num_procs_found); int -GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc); +GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, + std::unordered_set *gpu_set); int GetProcessGPUs(uint32_t pid, std::unordered_set *gpu_count); diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index c2585ced1e..565e5a7bb6 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -2924,7 +2924,17 @@ rsmi_compute_process_info_by_pid_get(uint32_t pid, return RSMI_STATUS_INVALID_ARGS; } - int err = amd::smi::GetProcessInfoForPID(pid, proc); + std::unordered_set gpu_set; + amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); + auto it = smi.kfd_node_map().begin(); + + while (it != smi.kfd_node_map().end()) { + uint64_t gpu_id = it->first; + gpu_set.insert(gpu_id); + it++; + } + + int err = amd::smi::GetProcessInfoForPID(pid, proc, &gpu_set); if (err) { return errno_to_rsmi_status(err); diff --git a/src/rocm_smi_kfd.cc b/src/rocm_smi_kfd.cc index bbaa2c43fb..b3c563a83b 100755 --- a/src/rocm_smi_kfd.cc +++ b/src/rocm_smi_kfd.cc @@ -384,10 +384,13 @@ int GetProcessGPUs(uint32_t pid, std::unordered_set *gpu_set) { return 0; } -int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc) { +int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, + std::unordered_set *gpu_set) { assert(proc != nullptr); + assert(gpu_set != nullptr); int err; std::string tmp; + std::unordered_set::iterator itr; std::string proc_str_path = kKFDProcPathRoot; proc_str_path += "/"; @@ -413,6 +416,27 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc) { } proc->pasid = std::stoi(tmp); + proc->vram_usage = 0; + + for (itr = gpu_set->begin(); itr != gpu_set->end(); itr++) { + uint64_t gpu_id = (*itr); + + std::string vram_str_path = proc_str_path; + vram_str_path += "/vram_"; + vram_str_path += std::to_string(gpu_id); + + err = ReadSysfsStr(vram_str_path, &tmp); + if (err) { + return err; + } + + if (!is_number(tmp)) { + return EINVAL; + } + + proc->vram_usage += std::stoi(tmp); + } + return 0; }