diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md
index cbe903cdf5..9cf0efeacc 100644
--- a/projects/amdsmi/CHANGELOG.md
+++ b/projects/amdsmi/CHANGELOG.md
@@ -12,6 +12,11 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Changed
+- **Added Compute Unit Occupancy information per process**
+ Measuring compute units are the best way currently to determine gfx usage on a per process basis
+ - Added `CU_OCCUPANCY` to `amd-smi process` output.
+ - Added `CU%` to `amd-smi monitor -q`
+
- **Expanded Violation Status tracking for GPU metrics 1.8.**
- The driver will no longer be supporting existing single-value GFX Clk Below Host Limit fields (`acc_gfx_clk_below_host_limit`, `per_gfx_clk_below_host_limit`, `active_gfx_clk_below_host_limit`), they are now changed in favor of new per-XCP/XCC arrays.
- Added new fields to `amdsmi_violation_status_t` and related interfaces for enhanced violation breakdown:
@@ -54,11 +59,8 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Resolved issues
-- N/A
-
-### Upcoming changes
-
-- N/A
+- **Corrected VRAM memory calculation in `amdsmi_get_gpu_process_list`.**
+ - Previously, the VRAM memory usage reported by `amdsmi_get_gpu_process_list` was inaccurate and calculated using KB vs KiB.
### Known issues
diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
index ffc91c34e1..a330e2b3d8 100644
--- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
+++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
@@ -3300,8 +3300,21 @@ class AMDSMICommands():
filtered_process_values = []
for process_info in process_list:
- process_info['mem_usage'] = process_info.pop('mem')
- process_info['usage'] = process_info.pop('engine_usage')
+ process_info = {
+ "name": process_info["name"],
+ "pid": process_info["pid"],
+ "memory_usage": {
+ "gtt_mem": process_info["memory_usage"]["gtt_mem"],
+ "cpu_mem": process_info["memory_usage"]["cpu_mem"],
+ "vram_mem": process_info["memory_usage"]["vram_mem"],
+ },
+ "mem_usage": process_info["mem"],
+ "usage": {
+ "gfx": process_info["engine_usage"]["gfx"],
+ "enc": process_info["engine_usage"]["enc"],
+ },
+ "cu_occupancy": process_info["cu_occupancy"]
+ }
engine_usage_unit = "ns"
memory_usage_unit = "B"
@@ -5714,35 +5727,43 @@ class AMDSMICommands():
logging.debug("Failed to get process list for gpu %s | %s", gpu_id, e.get_error_info())
raise e
+ try:
+ num_compute_units = amdsmi_interface.amdsmi_get_gpu_asic_info(args.gpu)['num_compute_units']
+ except (KeyError, amdsmi_exception.AmdSmiLibraryException) as e:
+ num_compute_units = "N/A"
+ logging.debug("Failed to get num compute units for gpu %s | %s", gpu_id, e.get_error_info())
+
# Clean processes dictionary
filtered_process_values = []
for process_info in process_list:
- process_info['mem_usage'] = process_info.pop('mem')
- process_info['usage'] = process_info.pop('engine_usage')
+ process_info.pop('mem') # Remove 'mem' value
+ process_info.pop('engine_usage') # Remove 'engine_usage' value
- engine_usage_unit = "ns"
memory_usage_unit = "B"
-
if self.logger.is_human_readable_format():
- process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage'])
for usage_metric in process_info['memory_usage']:
process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric])
memory_usage_unit = ""
- process_info['mem_usage'] = self.helpers.unit_format(self.logger,
- process_info['mem_usage'],
- memory_usage_unit)
-
- for usage_metric in process_info['usage']:
- process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger,
- process_info['usage'][usage_metric],
- engine_usage_unit)
-
for usage_metric in process_info['memory_usage']:
process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger,
process_info['memory_usage'][usage_metric],
memory_usage_unit)
+ if 'cu_occupancy' in process_info:
+ try:
+ cu_occupancy = process_info['cu_occupancy']
+ if num_compute_units != "N/A" and num_compute_units > 0:
+ cu_percentage = round((cu_occupancy / num_compute_units) * 100, 1)
+ process_info['cu_occupancy'] = self.helpers.unit_format(self.logger,
+ cu_percentage,
+ '%')
+ else:
+ process_info['cu_occupancy'] = "N/A"
+ except Exception as e:
+ process_info['cu_occupancy'] = "N/A"
+ logging.debug("Failed to calculate cu_occupancy percentage for GPU %s | %s", gpu_id, str(e))
+
filtered_process_values.append({'process_info': process_info})
# If no processes are populated then we populate an N/A placeholder
@@ -5757,8 +5778,7 @@ class AMDSMICommands():
# Build the process table's title and header
self.logger.secondary_table_title = "PROCESS INFO"
self.logger.secondary_table_header = 'GPU'.rjust(3) + "NAME".rjust(22) + "PID".rjust(9) + "GTT_MEM".rjust(10) + \
- "CPU_MEM".rjust(10) + "VRAM_MEM".rjust(10) + "MEM_USAGE".rjust(11) + \
- "GFX".rjust(8) + "ENC".rjust(8)
+ "CPU_MEM".rjust(10) + "VRAM_MEM".rjust(10) + "CU%".rjust(9)
if watching_output:
self.logger.secondary_table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.secondary_table_header
diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_logger.py b/projects/amdsmi/amdsmi_cli/amdsmi_logger.py
index 571e4a2a9e..06f8c930ee 100644
--- a/projects/amdsmi/amdsmi_cli/amdsmi_logger.py
+++ b/projects/amdsmi/amdsmi_cli/amdsmi_logger.py
@@ -214,9 +214,9 @@ class AMDSMILogger():
if process_dict['process_info'] == "No running processes detected":
# Add N/A for empty process_info
table_values += "N/A".rjust(20) + "N/A".rjust(9) + "N/A".rjust(10) + \
- "N/A".rjust(10) + "N/A".rjust(10) + "N/A".rjust(11) + \
- "N/A".rjust(8) + "N/A".rjust(8) + '\n'
+ "N/A".rjust(10) + "N/A".rjust(10) + "N/A".rjust(9) + '\n'
else:
+ #Fix this herre
for process_key, process_value in process_dict['process_info'].items():
string_process_value = str(process_value)
if process_key == "name":
@@ -230,11 +230,8 @@ class AMDSMILogger():
elif process_key == "memory_usage":
for memory_key, memory_value in process_value.items():
table_values += str(memory_value).rjust(10)
- elif process_key == "mem_usage":
- table_values += string_process_value.rjust(11)
- elif process_key == "usage":
- for usage_key, usage_value in process_value.items():
- table_values += str(usage_value).rjust(8)
+ elif process_key == "cu_occupancy":
+ table_values += string_process_value.rjust(9)
# Add the stored gpu and stored timestamp to the next line
table_values += '\n'
if stored_timestamp:
@@ -486,20 +483,6 @@ class AMDSMILogger():
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")
- def _store_output_rocmsmi(self, gpu_id, argument, data):
- if self.is_json_format():
- # put output into self.json_output
- pass
- elif self.is_csv_format():
- # put output into self.csv_output
- pass
- elif self.is_human_readable_format():
- # put output into self.human_readable_output
- pass
- else:
- raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")
-
-
def store_multiple_device_output(self):
""" Store the current output into the multiple_device_output
then clear the current output
diff --git a/projects/amdsmi/docs/reference/amdsmi-py-api.md b/projects/amdsmi/docs/reference/amdsmi-py-api.md
index a30f2a1f11..fff571ace7 100644
--- a/projects/amdsmi/docs/reference/amdsmi-py-api.md
+++ b/projects/amdsmi/docs/reference/amdsmi-py-api.md
@@ -1093,7 +1093,6 @@ except AmdSmiException as e:
print(e)
```
-
### amdsmi_get_gpu_process_list
Description: Returns the list of processes running on the target GPU; Requires root level access to display root process names; otherwise will return "N/A"
@@ -1111,6 +1110,7 @@ Field | Description
`mem` | Process memory usage
`engine_usage` |
| Subfield | Description |
| `gfx` | GFX engine usage in ns |
| `enc` | Encode engine usage in ns |
`memory_usage` | | Subfield | Description |
| `gtt_mem` | GTT memory usage |
| `cpu_mem` | CPU memory usage |
| `vram_mem` | VRAM memory usage |
+`cu_occupancy` | Number of Compute Units utilized
Exceptions that can be thrown by `amdsmi_get_gpu_process_list` function:
diff --git a/projects/amdsmi/example/amd_smi_drm_example.cc b/projects/amdsmi/example/amd_smi_drm_example.cc
index 6698e6c799..ded6dd9113 100644
--- a/projects/amdsmi/example/amd_smi_drm_example.cc
+++ b/projects/amdsmi/example/amd_smi_drm_example.cc
@@ -817,6 +817,7 @@ int main() {
amdsmi_proc_info_t process = {};
uint64_t mem = 0, gtt_mem = 0, cpu_mem = 0, vram_mem = 0;
uint64_t gfx = 0, enc = 0;
+ uint32_t cu_occupancy = 0;
char bdf_str[20];
sprintf(bdf_str, "%04" PRIx64 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32,
static_cast(bdf.domain_number),
@@ -837,7 +838,7 @@ int main() {
printf(
"| pid | name | user | gpu bdf | "
"fb usage | gtt memory | cpu memory | vram memory | "
- "engine usage (ns) |\n");
+ "engine usage (ns) | cu occupancy |\n");
printf("| | | | "
"| | | | "
" | gfx enc |\n");
@@ -855,30 +856,34 @@ int main() {
pwd = getpwuid(st.st_uid);
if (!pwd)
printf("| %5d | %16s | %10d | %s | %7ld KiB | %7ld KiB "
- "| %7ld KiB | %7ld KiB | %lu %lu |\n",
+ "| %7ld KiB | %7ld KiB | %lu %lu | %u |\n",
process_info_list[it].pid, process_info_list[it].name, st.st_uid,
bdf_str, process_info_list[it].mem / 1024,
process_info_list[it].memory_usage.gtt_mem / 1024,
process_info_list[it].memory_usage.cpu_mem / 1024,
process_info_list[it].memory_usage.vram_mem / 1024,
process_info_list[it].engine_usage.gfx,
- process_info_list[it].engine_usage.enc);
+ process_info_list[it].engine_usage.enc,
+ process_info_list[it].cu_occupancy);
else
printf("| %5d | %16s | %10s | %s | %7ld KiB | %7ld KiB "
- "| %7ld KiB | %7ld KiB | %lu %lu |\n",
+ "| %7ld KiB | %7ld KiB | %lu %lu | %u |\n",
process_info_list[it].pid, process_info_list[it].name,
pwd->pw_name, bdf_str, process_info_list[it].mem / 1024,
process_info_list[it].memory_usage.gtt_mem / 1024,
process_info_list[it].memory_usage.cpu_mem / 1024,
process_info_list[it].memory_usage.vram_mem / 1024,
process_info_list[it].engine_usage.gfx,
- process_info_list[it].engine_usage.enc);
+ process_info_list[it].engine_usage.enc,
+ process_info_list[it].cu_occupancy);
+
mem += process_info_list[it].mem / 1024;
gtt_mem += process_info_list[it].memory_usage.gtt_mem / 1024;
cpu_mem += process_info_list[it].memory_usage.cpu_mem / 1024;
vram_mem += process_info_list[it].memory_usage.vram_mem / 1024;
gfx = process_info_list[it].engine_usage.gfx;
enc = process_info_list[it].engine_usage.enc;
+ cu_occupancy = process_info_list[it].cu_occupancy;
printf(
"+-------+------------------+------------+-------------"
"-+-------------+-------------+-------------+----------"
@@ -887,10 +892,9 @@ int main() {
// TODO: To remove compiler warning, the last 3 values in this printf were
// set to 0L. Need to find out what these values need to be.
printf("| TOTAL:| %s | %7ld "
- "KiB | %7ld KiB | %7ld KiB | %7ld KiB | %lu %lu "
- "%lu %lu %lu |\n",
+ "KiB | %7ld KiB | %7ld KiB | %7ld KiB | %lu %lu | %u |\n",
bdf_str, mem, gtt_mem, cpu_mem, vram_mem, gfx,
- enc, 0L, 0L, 0L);
+ enc, cu_occupancy, 0L);
printf("+=======+==================+============+=============="
"+=============+=============+=============+============"
"=+==========================================+\n");
diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h
index 02a0117772..d5cf983b93 100644
--- a/projects/amdsmi/include/amd_smi/amdsmi.h
+++ b/projects/amdsmi/include/amd_smi/amdsmi.h
@@ -1093,7 +1093,8 @@ typedef struct {
uint32_t reserved[10];
} memory_usage; //!< in bytes
char container_name[AMDSMI_MAX_STRING_LENGTH];
- uint32_t reserved[12];
+ uint32_t cu_occupancy; //!< Num CUs utilized
+ uint32_t reserved[11];
} amdsmi_proc_info_t;
/**
diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py
index fc942257b0..6e7f2a3b01 100644
--- a/projects/amdsmi/py-interface/amdsmi_interface.py
+++ b/projects/amdsmi/py-interface/amdsmi_interface.py
@@ -2691,6 +2691,7 @@ def amdsmi_get_gpu_process_list(
"cpu_mem": process_list[index].memory_usage.cpu_mem,
"vram_mem": process_list[index].memory_usage.vram_mem,
},
+ "cu_occupancy": process_list[index].cu_occupancy
})
return result
diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py
index c915ba8467..565688cfd6 100644
--- a/projects/amdsmi/py-interface/amdsmi_wrapper.py
+++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py
@@ -1289,7 +1289,8 @@ struct_amdsmi_proc_info_t._fields_ = [
('engine_usage', struct_engine_usage_),
('memory_usage', struct_memory_usage_),
('container_name', ctypes.c_char * 256),
- ('reserved', ctypes.c_uint32 * 12),
+ ('cu_occupancy', ctypes.c_uint32),
+ ('PADDING_1', ctypes.c_ubyte * 4),
]
amdsmi_proc_info_t = struct_amdsmi_proc_info_t
diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc
index 070e7a6f7b..cbdf93f539 100644
--- a/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc
+++ b/projects/amdsmi/rocm_smi/src/rocm_smi_kfd.cc
@@ -456,7 +456,6 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc,
proc->sdma_usage = 0;
proc->cu_occupancy = 0;
- uint32_t cu_count = 0;
static amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
static std::map>& kfd_node_map =
smi.kfd_node_map();
@@ -510,23 +509,15 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc,
}
else if(sysfs_data_errcode==0){
// Update CU usage by the process
- proc->cu_occupancy += std::stoi(tmp);
- // Collect count of compute units
- cu_count += kfd_node_map[gpu_id]->cu_count();
+ proc->cu_occupancy = std::stoi(tmp);
}
else {
// Some GFX revisions do not provide cu_occupancy debugfs method
// which may cause ENOENT
proc->cu_occupancy = CU_OCCUPANCY_INVALID;
- cu_count = 0;
}
}
- // Adjust CU occupancy to percent.
- if (cu_count > 0) {
- proc->cu_occupancy = ((proc->cu_occupancy * 100) / cu_count);
- }
-
return 0;
}
diff --git a/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc b/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc
index acaa94e627..602041674d 100644
--- a/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc
+++ b/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc
@@ -216,6 +216,9 @@ int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t&
/**
* Complete the process information
+ * This is where we copy rsmi_process_info_t into the larger amdsmi_proc_info_t
+ * Then populate the remaining fields with the gpuvsmi_get_pid_info()
+ * TODO FIX HERE TO GRAB KFD VRAM if /proc is inconsistent
*/
auto get_process_info = [&](const rsmi_process_info_t& rsmi_proc_info, amdsmi_proc_info_t& asmi_proc_info) {
auto status_code = gpuvsmi_get_pid_info(get_bdf(), rsmi_proc_info.process_id, asmi_proc_info);
@@ -225,6 +228,9 @@ int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t&
asmi_proc_info.memory_usage.vram_mem = rsmi_proc_info.vram_usage;
}
+ // Copy the cu occupancy from rsmi_process_info_t to amdsmi_proc_info_t
+ asmi_proc_info.cu_occupancy = rsmi_proc_info.cu_occupancy;
+
return status_code;
};
diff --git a/projects/amdsmi/src/amd_smi/fdinfo.cc b/projects/amdsmi/src/amd_smi/fdinfo.cc
index 30f1be27a9..13454a3b29 100644
--- a/projects/amdsmi/src/amd_smi/fdinfo.cc
+++ b/projects/amdsmi/src/amd_smi/fdinfo.cc
@@ -20,16 +20,17 @@
* THE SOFTWARE.
*/
-#include
#include
#include
+#include
+#include
#include
-#include
-#include
+
+#include
#include
#include
-#include
-#include
+#include
+#include
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_utils.h"
@@ -37,230 +38,196 @@
extern "C" {
static const char *container_type_name[AMDSMI_MAX_CONTAINER_TYPE] = {
- [AMDSMI_CONTAINER_LXC] = "lxc",
- [AMDSMI_CONTAINER_DOCKER] = "docker",
+ [AMDSMI_CONTAINER_LXC] = "lxc",
+ [AMDSMI_CONTAINER_DOCKER] = "docker",
};
-amdsmi_status_t gpuvsmi_pid_is_gpu(const std::string &path, const char *bdf)
-{
- DIR *d;
- struct dirent *dir;
+amdsmi_status_t gpuvsmi_pid_is_gpu(const std::string &path, const char *bdf) {
+ DIR *d;
+ struct dirent *dir;
- d = opendir(path.c_str());
- if (!d)
- return AMDSMI_STATUS_NO_PERM;
+ d = opendir(path.c_str());
+ if (!d) return AMDSMI_STATUS_NO_PERM;
- /* iterate through all the fds, try to find
- * a match for the GPU bdf
- */
- while ((dir = readdir(d)) != NULL) {
- std::string file = path + dir->d_name;
- std::ifstream fdinfo(file.c_str());
- for (std::string line; std::getline(fdinfo, line);) {
- if (line.find(bdf) != std::string::npos) {
- closedir(d);
- return AMDSMI_STATUS_SUCCESS;
- }
- }
- }
+ /* iterate through all the fds, try to find
+ * a match for the GPU bdf
+ */
+ while ((dir = readdir(d)) != NULL) {
+ std::string file = path + dir->d_name;
+ std::ifstream fdinfo(file.c_str());
+ for (std::string line; std::getline(fdinfo, line);) {
+ if (line.find(bdf) != std::string::npos) {
+ closedir(d);
+ return AMDSMI_STATUS_SUCCESS;
+ }
+ }
+ }
- closedir(d);
+ closedir(d);
- return AMDSMI_STATUS_NOT_FOUND;
+ return AMDSMI_STATUS_NOT_FOUND;
}
-amdsmi_status_t gpuvsmi_get_pids(const amdsmi_bdf_t &bdf, std::vector &pids, uint64_t *size)
-{
- char bdf_str[13];
- DIR *d;
- struct dirent *dir;
+amdsmi_status_t gpuvsmi_get_pids(const amdsmi_bdf_t &bdf, std::vector &pids,
+ uint64_t *size) {
+ char bdf_str[13];
+ DIR *d;
+ struct dirent *dir;
- /* 0000:00:00.0 */
- snprintf(bdf_str, 13, "%04" PRIx32 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32,
- static_cast(bdf.domain_number & 0xffff),
- static_cast(bdf.bus_number & 0xff),
- static_cast(bdf.device_number & 0x1f),
- static_cast(bdf.function_number & 0x7));
+ /* 0000:00:00.0 */
+ snprintf(bdf_str, 13, "%04" PRIx32 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32,
+ static_cast(bdf.domain_number & 0xffff),
+ static_cast(bdf.bus_number & 0xff),
+ static_cast(bdf.device_number & 0x1f),
+ static_cast(bdf.function_number & 0x7));
- d = opendir("/proc");
- if (!d)
- return AMDSMI_STATUS_NO_PERM;
+ d = opendir("/proc");
+ if (!d) return AMDSMI_STATUS_NO_PERM;
- pids.clear();
- /* Find the pid folders in /proc/ that we have access to */
- while ((dir = readdir(d)) != NULL) {
- if (dir->d_type == DT_DIR) {
- /* Try to cast the name of the folder to a
- * number, if it fails, it is not */
- char *p;
- long int pid;
+ pids.clear();
+ /* Find the pid folders in /proc/ that we have access to */
+ while ((dir = readdir(d)) != NULL) {
+ if (dir->d_type == DT_DIR) {
+ /* Try to cast the name of the folder to a
+ * number, if it fails, it is not */
+ char *p;
+ long int pid;
- pid = strtol(dir->d_name, &p, 10);
- if (*p != 0)
- continue;
+ pid = strtol(dir->d_name, &p, 10);
+ if (*p != 0) continue;
- /* Check if fdinfo is accesible */
- std::string path = "/proc/" + std::string(dir->d_name) + "/fdinfo/";
+ /* Check if fdinfo is accesible */
+ std::string path = "/proc/" + std::string(dir->d_name) + "/fdinfo/";
- if (access(path.c_str(), R_OK))
- continue;
+ if (access(path.c_str(), R_OK)) continue;
- /* check if GPU is present */
- if (gpuvsmi_pid_is_gpu(path, bdf_str))
- continue;
- pids.push_back(pid);
- }
- }
- closedir(d);
+ /* check if GPU is present */
+ if (gpuvsmi_pid_is_gpu(path, bdf_str)) continue;
+ pids.push_back(pid);
+ }
+ }
+ closedir(d);
- *size = pids.size();
- return AMDSMI_STATUS_SUCCESS;
+ *size = pids.size();
+ return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t gpuvsmi_get_pid_info(const amdsmi_bdf_t &bdf, long int pid,
- amdsmi_proc_info_t &info)
-{
- char bdf_str[13];
- DIR *d;
- struct dirent *dir;
+ amdsmi_proc_info_t &info) {
+ char bdf_str[13];
+ DIR *d;
+ struct dirent *dir;
- /* 0000:00:00.0 */
- snprintf(bdf_str, 13, "%04" PRIx32 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32,
- static_cast(bdf.domain_number & 0xffff),
- static_cast(bdf.bus_number & 0xff),
- static_cast(bdf.device_number & 0x1f),
- static_cast(bdf.function_number & 0x7));
+ /* 0000:00:00.0 */
+ snprintf(bdf_str, 13, "%04" PRIx32 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32,
+ static_cast(bdf.domain_number & 0xffff),
+ static_cast(bdf.bus_number & 0xff),
+ static_cast(bdf.device_number & 0x1f),
+ static_cast(bdf.function_number & 0x7));
- std::string path = "/proc/" + std::to_string(pid) + "/fdinfo/";
- std::string name_path = "/proc/" + std::to_string(pid) + "/comm";
- std::string cgroup_path = "/proc/" + std::to_string(pid) + "/cgroup";
+ std::string path = "/proc/" + std::to_string(pid) + "/fdinfo/";
+ std::string name_path = "/proc/" + std::to_string(pid) + "/comm";
+ std::string cgroup_path = "/proc/" + std::to_string(pid) + "/cgroup";
- if (gpuvsmi_pid_is_gpu(path.c_str(), bdf_str)) {
- return AMDSMI_STATUS_INVAL;
- }
+ if (gpuvsmi_pid_is_gpu(path.c_str(), bdf_str)) {
+ return AMDSMI_STATUS_INVAL;
+ }
- d = opendir(path.c_str());
- if (!d)
- return AMDSMI_STATUS_NO_PERM;
+ d = opendir(path.c_str());
+ if (!d) return AMDSMI_STATUS_NO_PERM;
- /* Vectors to check if repated fd pasid */
- // TODO remove pasid Not working in ROCm 6.4+, deprecating in 7.0
- std::vector pasids;
+ /* Vectors to check if repated fd pasid */
+ // TODO remove pasid Not working in ROCm 6.4+, deprecating in 7.0
+ std::vector pasids;
- memset(&info, 0, sizeof(info));
- /* Iterate through all fdinfos */
- while ((dir = readdir(d)) != NULL) {
+ memset(&info, 0, sizeof(info));
+ /* Iterate through all fdinfos */
+ while ((dir = readdir(d)) != NULL) {
+ std::string file = path + dir->d_name;
+ std::ifstream fdinfo(file.c_str());
- std::string file = path + dir->d_name;
- std::ifstream fdinfo(file.c_str());
+ for (std::string bdfline; getline(fdinfo, bdfline);) {
+ if (bdfline.find("drm-pdev:") != std::string::npos) {
+ char fd_bdf_str[13];
- for (std::string bdfline; getline(fdinfo, bdfline);) {
- if (bdfline.find("drm-pdev:") != std::string::npos) {
- char fd_bdf_str[13];
+ /* Only check against fdinfo files that contain a bdf */
+ if (sscanf(bdfline.c_str(), "drm-pdev: %s", &fd_bdf_str[0]) != 1) continue;
- /* Only check against fdinfo files that contain a bdf */
- if (sscanf(bdfline.c_str(), "drm-pdev: %s", &fd_bdf_str[0]) != 1)
- continue;
+ /* Populate amdsmi_proc_info_t struct only if the bdf in
+ * the fdinfo file matches the passed bdf */
+ if (strncmp(bdf_str, fd_bdf_str, 13) == 0) {
+ std::ifstream fdinfo(file.c_str());
- /* Populate amdsmi_proc_info_t struct only if the bdf in
- * the fdinfo file matches the passed bdf */
- if (strncmp(bdf_str, fd_bdf_str, 13) == 0){
- std::ifstream fdinfo(file.c_str());
+ for (std::string line; getline(fdinfo, line);) {
+ if (line.find("pasid:") != std::string::npos) {
+ int pasid;
+ if (sscanf(line.c_str(), "pasid: %d", &pasid) != 1) continue;
+ auto it = std::find(pasids.begin(), pasids.end(), pasid);
+ if (it == pasids.end()) pasids.push_back(pasid);
+ } else if (line.find("drm-memory-gtt:") != std::string::npos) {
+ unsigned long mem;
+ if (sscanf(line.c_str(), "drm-memory-gtt: %lu", &mem) != 1) continue;
+ info.mem += mem * 1000;
+ info.memory_usage.gtt_mem += mem * 1000;
+ } else if (line.find("drm-memory-cpu:") != std::string::npos) {
+ unsigned long mem;
+ if (sscanf(line.c_str(), "drm-memory-cpu: %lu", &mem) != 1) continue;
+ info.mem += mem * 1000;
+ info.memory_usage.cpu_mem += mem * 1000;
+ } else if (line.find("drm-memory-vram:") != std::string::npos) {
+ unsigned long mem;
+ if (sscanf(line.c_str(), "drm-memory-vram: %lu", &mem) != 1) continue;\
+ info.mem += mem * 1000;
+ info.memory_usage.vram_mem += mem * 1000;
+ } else if (line.find("drm-engine-gfx") != std::string::npos) {
+ uint64_t engine_gfx;
+ if (sscanf(line.c_str(), "drm-engine-gfx: %lu", &engine_gfx) != 1) continue;
+ info.engine_usage.gfx = engine_gfx;
+ } else if (line.find("drm-engine-enc") != std::string::npos) {
+ uint64_t engine_enc;
+ if (sscanf(line.c_str(), "drm-engine-enc: %lu", &engine_enc) != 1) continue;
+ info.engine_usage.enc = engine_enc;
+ }
+ }
+ }
+ }
+ }
+ }
- for (std::string line; getline(fdinfo, line);) {
- if (line.find("pasid:") != std::string::npos) {
- int pasid;
-
- if (sscanf(line.c_str(), "pasid: %d", &pasid) != 1)
- continue;
-
- auto it = std::find(pasids.begin(), pasids.end(), pasid);
-
- if (it == pasids.end())
- pasids.push_back(pasid);
- } else if (line.find("drm-memory-gtt:") != std::string::npos) {
- unsigned long mem;
-
- if (sscanf(line.c_str(), "drm-memory-gtt: %lu", &mem) != 1)
- continue;
-
- info.mem += mem * 1024;
- info.memory_usage.gtt_mem += mem * 1024;
- } else if (line.find("drm-memory-cpu:") != std::string::npos) {
- unsigned long mem;
-
- if (sscanf(line.c_str(), "drm-memory-cpu: %lu", &mem) != 1)
- continue;
-
- info.mem += mem * 1024;
- info.memory_usage.cpu_mem += mem * 1024;
- } else if (line.find("drm-memory-vram:") != std::string::npos) {
- unsigned long mem;
-
- if (sscanf(line.c_str(), "drm-memory-vram: %lu", &mem) != 1)
- continue;
-
- info.mem += mem * 1024;
- info.memory_usage.vram_mem += mem * 1024;
- } else if (line.find("drm-engine-gfx") != std::string::npos) {
- uint64_t engine_gfx;
-
- if (sscanf(line.c_str(), "drm-engine-gfx: %lu", &engine_gfx) != 1)
- continue;
-
- info.engine_usage.gfx = engine_gfx;
- } else if (line.find("drm-engine-enc") != std::string::npos) {
- uint64_t engine_enc;
-
- if (sscanf(line.c_str(), "drm-engine-enc: %lu", &engine_enc) != 1)
- continue;
-
- info.engine_usage.enc = engine_enc;
- }
- }
- }
- }
- }
- }
-
- closedir(d);
+ closedir(d);
// Note: If possible at all, try to get the name of the process/container.
// In case the other info fail, get at least something.
- std::ifstream filename(name_path.c_str());
- std::string name;
+ std::ifstream filename(name_path.c_str());
+ std::string name;
- getline(filename, name);
+ getline(filename, name);
- if (name.empty())
- return AMDSMI_STATUS_API_FAILED;
+ if (name.empty()) return AMDSMI_STATUS_API_FAILED;
- strncpy(info.name, name.c_str(), std::min(
- (unsigned long) AMDSMI_MAX_STRING_LENGTH,
- name.length()));
+ strncpy(info.name, name.c_str(),
+ std::min((unsigned long)AMDSMI_MAX_STRING_LENGTH, name.length()));
- for (int i = 0; i < AMDSMI_MAX_CONTAINER_TYPE; i++) {
- std::ifstream cgroup_info(cgroup_path.c_str());
- std::string container_id;
- for (std::string line; getline(cgroup_info, line);) {
- if (line.find(container_type_name[i]) != std::string::npos) {
- container_id = line.substr(line.find(container_type_name[i]) +
- strlen(container_type_name[i]) + 1, 16);
- strcpy(info.container_name, container_id.c_str());
- break;
- }
- }
- if (strlen(info.container_name) > 0)
- break;
- }
- info.pid = (uint32_t)pid;
+ for (int i = 0; i < AMDSMI_MAX_CONTAINER_TYPE; i++) {
+ std::ifstream cgroup_info(cgroup_path.c_str());
+ std::string container_id;
+ for (std::string line; getline(cgroup_info, line);) {
+ if (line.find(container_type_name[i]) != std::string::npos) {
+ container_id = line.substr(line.find(container_type_name[i]) +
+ strlen(container_type_name[i]) + 1, 16);
+ strcpy(info.container_name, container_id.c_str());
+ break;
+ }
+ }
+ if (strlen(info.container_name) > 0) break;
+ }
+ info.pid = (uint32_t)pid;
- if (!pasids.size()) {
- return AMDSMI_STATUS_NOT_FOUND;
+ if (!pasids.size()) {
+ return AMDSMI_STATUS_NOT_FOUND;
}
- return AMDSMI_STATUS_SUCCESS;
+ return AMDSMI_STATUS_SUCCESS;
}
-
-} // extern "C"
+} // extern "C"
diff --git a/projects/amdsmi/tools/amdsmi_quick_start.py b/projects/amdsmi/tools/amdsmi_quick_start.py
index 4c0c35a4b6..2440fbeff0 100644
--- a/projects/amdsmi/tools/amdsmi_quick_start.py
+++ b/projects/amdsmi/tools/amdsmi_quick_start.py
@@ -44,15 +44,12 @@ try:
from amdsmi_logger import AMDSMILogger
from amdsmi_parser import AMDSMIParser
import amdsmi_cli_exceptions
+ helpers = AMDSMIHelpers()
except ImportError as e:
print(f"Failed to import amdsmi cli libs: {e}")
print("Ensure that you have installed amdsmi's package.")
-helpers = AMDSMIHelpers()
-
-
-
# Make exit & quit work without parens because it's annoying
type(exit).__repr__ = sys.exit