diff --git a/docs/reference/amdsmi-py-api.md b/docs/reference/amdsmi-py-api.md
index fff571ace7..16cf3f78cc 100644
--- a/docs/reference/amdsmi-py-api.md
+++ b/docs/reference/amdsmi-py-api.md
@@ -1107,9 +1107,9 @@ Field | Description
---|---
`name` | Name of process. If user does not have permission this will be "N/A"
`pid` | Process ID
-`mem` | Process memory usage
+`mem` | Process memory usage in Bytes
`engine_usage` |
| Subfield | Description |
| `gfx` | GFX engine usage in ns |
| `enc` | Encode engine usage in ns |
-`memory_usage` | | Subfield | Description |
| `gtt_mem` | GTT memory usage |
| `cpu_mem` | CPU memory usage |
| `vram_mem` | VRAM memory usage |
+`memory_usage` | | Subfield | Description |
| `gtt_mem` | GTT memory usage in Bytes |
| `cpu_mem` | CPU memory usage in Bytes |
| `vram_mem` | VRAM memory usage in Bytes |
`cu_occupancy` | Number of Compute Units utilized
Exceptions that can be thrown by `amdsmi_get_gpu_process_list` function:
diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h
index d5cf983b93..e8353c4ba3 100644
--- a/include/amd_smi/amdsmi.h
+++ b/include/amd_smi/amdsmi.h
@@ -1080,18 +1080,18 @@ typedef uint32_t amdsmi_process_handle_t;
typedef struct {
char name[AMDSMI_MAX_STRING_LENGTH];
amdsmi_process_handle_t pid;
- uint64_t mem; //!< In bytes
+ uint64_t mem; //!< In Bytes
struct engine_usage_ {
uint64_t gfx; //!< In nano-secs
uint64_t enc; //!< In nano-secs
uint32_t reserved[12];
} engine_usage; //!< time the process spends using these engines in ns
struct memory_usage_ {
- uint64_t gtt_mem; //!< In MB
- uint64_t cpu_mem; //!< In MB
- uint64_t vram_mem; //!< In MB
+ uint64_t gtt_mem; //!< In Bytes
+ uint64_t cpu_mem; //!< In Bytes
+ uint64_t vram_mem; //!< In Bytes
uint32_t reserved[10];
- } memory_usage; //!< in bytes
+ } memory_usage; //!< In Bytes
char container_name[AMDSMI_MAX_STRING_LENGTH];
uint32_t cu_occupancy; //!< Num CUs utilized
uint32_t reserved[11];
diff --git a/include/amd_smi/impl/amd_smi_gpu_device.h b/include/amd_smi/impl/amd_smi_gpu_device.h
index 1bef7182b1..b42a9b468a 100644
--- a/include/amd_smi/impl/amd_smi_gpu_device.h
+++ b/include/amd_smi/impl/amd_smi_gpu_device.h
@@ -62,6 +62,7 @@ class AMDSmiGPUDevice: public AMDSmiProcessor {
uint32_t get_gpu_fd() const;
uint32_t get_card_id(); // -e feature + we can get card_id for our internal functions
uint32_t get_drm_render_minor(); // -e feature + we can get card_id for our internal functions
+ uint64_t get_kfd_gpu_id(); // Used to decode vram usage for KFD processes
std::string& get_gpu_path();
amdsmi_bdf_t get_bdf();
bool check_if_drm_is_supported() { return drm_.check_if_drm_is_supported(); }
@@ -84,6 +85,7 @@ class AMDSmiGPUDevice: public AMDSmiProcessor {
AMDSmiDrm& drm_;
uint32_t card_index_;
uint32_t drm_render_minor_;
+ uint64_t kfd_gpu_id_; // Used to decode vram usage for KFD processes
GPUComputeProcessList_t compute_process_list_;
int32_t get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
ComputeProcessListType_t list_type);
diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py
index 565688cfd6..f8ff10291a 100644
--- a/py-interface/amdsmi_wrapper.py
+++ b/py-interface/amdsmi_wrapper.py
@@ -1290,7 +1290,7 @@ struct_amdsmi_proc_info_t._fields_ = [
('memory_usage', struct_memory_usage_),
('container_name', ctypes.c_char * 256),
('cu_occupancy', ctypes.c_uint32),
- ('PADDING_1', ctypes.c_ubyte * 4),
+ ('reserved', ctypes.c_uint32 * 11),
]
amdsmi_proc_info_t = struct_amdsmi_proc_info_t
@@ -3163,19 +3163,18 @@ __all__ = \
'AMDSMI_VOLT_LAST', 'AMDSMI_VOLT_LOWEST', 'AMDSMI_VOLT_MAX',
'AMDSMI_VOLT_MAX_CRIT', 'AMDSMI_VOLT_MIN', 'AMDSMI_VOLT_MIN_CRIT',
'AMDSMI_VOLT_TYPE_FIRST', 'AMDSMI_VOLT_TYPE_INVALID',
- 'AMDSMI_VOLT_TYPE_LAST', 'AMDSMI_VOLT_TYPE_VDDGFX',
- 'AMDSMI_VOLT_TYPE_VDDBOARD',
- 'AMDSMI_VRAM_TYPE_DDR2', 'AMDSMI_VRAM_TYPE_DDR3',
- 'AMDSMI_VRAM_TYPE_DDR4', 'AMDSMI_VRAM_TYPE_GDDR1',
- 'AMDSMI_VRAM_TYPE_GDDR2', 'AMDSMI_VRAM_TYPE_GDDR3',
- 'AMDSMI_VRAM_TYPE_GDDR4', 'AMDSMI_VRAM_TYPE_GDDR5',
- 'AMDSMI_VRAM_TYPE_GDDR6', 'AMDSMI_VRAM_TYPE_GDDR7',
- 'AMDSMI_VRAM_TYPE_HBM', 'AMDSMI_VRAM_TYPE_HBM2',
- 'AMDSMI_VRAM_TYPE_HBM2E', 'AMDSMI_VRAM_TYPE_HBM3',
- 'AMDSMI_VRAM_TYPE_UNKNOWN', 'AMDSMI_VRAM_TYPE__MAX',
- 'AMDSMI_XGMI_LINK_DISABLE', 'AMDSMI_XGMI_LINK_DOWN',
- 'AMDSMI_XGMI_LINK_UP', 'AMDSMI_XGMI_STATUS_ERROR',
- 'AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS',
+ 'AMDSMI_VOLT_TYPE_LAST', 'AMDSMI_VOLT_TYPE_VDDBOARD',
+ 'AMDSMI_VOLT_TYPE_VDDGFX', 'AMDSMI_VRAM_TYPE_DDR2',
+ 'AMDSMI_VRAM_TYPE_DDR3', 'AMDSMI_VRAM_TYPE_DDR4',
+ 'AMDSMI_VRAM_TYPE_GDDR1', 'AMDSMI_VRAM_TYPE_GDDR2',
+ 'AMDSMI_VRAM_TYPE_GDDR3', 'AMDSMI_VRAM_TYPE_GDDR4',
+ 'AMDSMI_VRAM_TYPE_GDDR5', 'AMDSMI_VRAM_TYPE_GDDR6',
+ 'AMDSMI_VRAM_TYPE_GDDR7', 'AMDSMI_VRAM_TYPE_HBM',
+ 'AMDSMI_VRAM_TYPE_HBM2', 'AMDSMI_VRAM_TYPE_HBM2E',
+ 'AMDSMI_VRAM_TYPE_HBM3', 'AMDSMI_VRAM_TYPE_UNKNOWN',
+ 'AMDSMI_VRAM_TYPE__MAX', 'AMDSMI_XGMI_LINK_DISABLE',
+ 'AMDSMI_XGMI_LINK_DOWN', 'AMDSMI_XGMI_LINK_UP',
+ 'AMDSMI_XGMI_STATUS_ERROR', 'AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS',
'AMDSMI_XGMI_STATUS_NO_ERRORS', 'CLK_LIMIT_MAX', 'CLK_LIMIT_MIN',
'RD_BW0', 'WR_BW0', 'amd_metrics_table_header_t',
'amdsmi_accelerator_partition_profile_config_t',
diff --git a/src/amd_smi/amd_smi_gpu_device.cc b/src/amd_smi/amd_smi_gpu_device.cc
index 602041674d..a20ae44f2b 100644
--- a/src/amd_smi/amd_smi_gpu_device.cc
+++ b/src/amd_smi/amd_smi_gpu_device.cc
@@ -100,6 +100,35 @@ uint32_t AMDSmiGPUDevice::get_drm_render_minor() {
return this->drm_render_minor_;
}
+uint64_t AMDSmiGPUDevice::get_kfd_gpu_id() {
+ std::ostringstream ss;
+ // Should never return not_supported, but just in case
+ rsmi_status_t ret = rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED;
+ uint32_t gpu_index = this->get_gpu_id();
+ rsmi_device_identifiers_t identifiers = rsmi_device_identifiers_t{};
+ ret = rsmi_dev_device_identifiers_get(gpu_index, &identifiers);
+ if (ret != rsmi_status_t::RSMI_STATUS_SUCCESS) {
+ this->kfd_gpu_id_ = std::numeric_limits::max();
+ } else {
+ this->kfd_gpu_id_ = identifiers.kfd_gpu_id;
+ }
+
+ ss << __PRETTY_FUNCTION__
+ << " | rsmi_dev_identifiers_get status: " << getRSMIStatusString(ret, false) << "\n"
+ << " | gpu_id_: " << gpu_id_ << "\n"
+ << " | identifiers.card_index: " << identifiers.card_index << "\n"
+ << " | identifiers.drm_render_minor: " << identifiers.drm_render_minor << "\n"
+ << " | identifiers.bdfid: " << std::hex << "0x" << identifiers.bdfid << "\n"
+ << " | identifiers.kfd_gpu_id: " << std::dec << identifiers.kfd_gpu_id << "\n"
+ << " | identifiers.partition_id: " << identifiers.partition_id << "\n"
+ << " | identifiers.smi_device_id: " << identifiers.smi_device_id << "\n"
+ << " | returning kfd_gpu_id_: "
+ << this->kfd_gpu_id_ << std::endl;
+ // std::cout << ss.str();
+ LOG_DEBUG(ss);
+ return this->kfd_gpu_id_;
+}
+
uint32_t AMDSmiGPUDevice::get_gpu_fd() const {
return fd_;
}
@@ -159,7 +188,6 @@ pthread_mutex_t* AMDSmiGPUDevice::get_mutex() {
return amd::smi::GetMutex(gpu_id_);
}
-
int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
ComputeProcessListType_t list_type)
{
@@ -231,6 +259,41 @@ int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t&
// Copy the cu occupancy from rsmi_process_info_t to amdsmi_proc_info_t
asmi_proc_info.cu_occupancy = rsmi_proc_info.cu_occupancy;
+ // Safely handle KFD file access
+ uint64_t kfd_gpu_id = get_kfd_gpu_id();
+ std::string kfd_path = "/sys/class/kfd/kfd/proc/" +
+ std::to_string(rsmi_proc_info.process_id) +
+ "/vram_" + std::to_string(kfd_gpu_id);
+
+ // Check if the file exists before attempting to open it
+ if (access(kfd_path.c_str(), R_OK) == 0) {
+ std::ifstream kfd_file(kfd_path.c_str());
+ if (kfd_file.is_open()) {
+ std::string line;
+ if (std::getline(kfd_file, line)) {
+ try {
+ uint64_t vram_bytes = std::stoull(line);
+ asmi_proc_info.mem = vram_bytes; // Already in bytes
+ asmi_proc_info.memory_usage.vram_mem = vram_bytes; // Already in bytes
+ } catch (const std::exception& e) {
+ // Handle conversion error gracefully
+ std::ostringstream ss;
+ ss << __PRETTY_FUNCTION__ << " | Failed to parse VRAM value from KFD: " << e.what();
+ LOG_DEBUG(ss);
+ }
+ }
+ kfd_file.close();
+ } else {
+ std::ostringstream ss;
+ ss << __PRETTY_FUNCTION__ << " | Failed to open KFD file: " << kfd_path;
+ LOG_DEBUG(ss);
+ }
+ } else {
+ std::ostringstream ss;
+ ss << __PRETTY_FUNCTION__ << " | KFD file not accessible: " << kfd_path;
+ LOG_DEBUG(ss);
+ }
+
return status_code;
};
diff --git a/src/amd_smi/fdinfo.cc b/src/amd_smi/fdinfo.cc
index 13454a3b29..1dd43a9382 100644
--- a/src/amd_smi/fdinfo.cc
+++ b/src/amd_smi/fdinfo.cc
@@ -166,26 +166,26 @@ amdsmi_status_t gpuvsmi_get_pid_info(const amdsmi_bdf_t &bdf, long int pid,
if (it == pasids.end()) pasids.push_back(pasid);
} else if (line.find("drm-memory-gtt:") != std::string::npos) {
unsigned long mem;
- if (sscanf(line.c_str(), "drm-memory-gtt: %lu", &mem) != 1) continue;
+ if (sscanf(line.c_str(), "drm-memory-gtt: %" PRIu32, &mem) != 1) continue;
info.mem += mem * 1000;
info.memory_usage.gtt_mem += mem * 1000;
} else if (line.find("drm-memory-cpu:") != std::string::npos) {
unsigned long mem;
- if (sscanf(line.c_str(), "drm-memory-cpu: %lu", &mem) != 1) continue;
+ if (sscanf(line.c_str(), "drm-memory-cpu: %" PRIu32, &mem) != 1) continue;
info.mem += mem * 1000;
info.memory_usage.cpu_mem += mem * 1000;
} else if (line.find("drm-memory-vram:") != std::string::npos) {
unsigned long mem;
- if (sscanf(line.c_str(), "drm-memory-vram: %lu", &mem) != 1) continue;\
+ if (sscanf(line.c_str(), "drm-memory-vram: %" PRIu32, &mem) != 1) continue;
info.mem += mem * 1000;
info.memory_usage.vram_mem += mem * 1000;
} else if (line.find("drm-engine-gfx") != std::string::npos) {
uint64_t engine_gfx;
- if (sscanf(line.c_str(), "drm-engine-gfx: %lu", &engine_gfx) != 1) continue;
+ if (sscanf(line.c_str(), "drm-engine-gfx: %" PRIu32, &engine_gfx) != 1) continue;
info.engine_usage.gfx = engine_gfx;
} else if (line.find("drm-engine-enc") != std::string::npos) {
uint64_t engine_enc;
- if (sscanf(line.c_str(), "drm-engine-enc: %lu", &engine_enc) != 1) continue;
+ if (sscanf(line.c_str(), "drm-engine-enc: %" PRIu32, &engine_enc) != 1) continue;
info.engine_usage.enc = engine_enc;
}
}