From 8326c33d33d4ddd6583b7a61185bc908e57d0164 Mon Sep 17 00:00:00 2001 From: Bindhiya Kanangot Balakrishnan Date: Thu, 8 Jan 2026 10:19:45 -0600 Subject: [PATCH] [SWDEV-573540] Add DRM-based wake for suspended AMD GPUs (#2510) Implements automatic device wake using getDRMDeviceId() DRM call when GPUs are detected in low-power state. This ensures rocm-smi can access device information on suspended GPUs. Signed-off-by: Bindhiya Kanangot Balakrishnan --- projects/rocm-smi-lib/python_smi_tools/rocm_smi.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index 408a69ae31..f3a53b61e1 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -4518,7 +4518,19 @@ if __name__ == '__main__': if not checkAmdGpus(deviceList): logging.warning('No AMD GPUs specified') if not check_runtime_status(): - logging.warning('AMD GPU device(s) is/are in a low-power state. Check power control/runtime_status\n') + wake_device_failed = False + logging.debug('Using DRM device ID call to wake suspended devices') + for device in deviceList: + try: + device_id = getDRMDeviceId(device, silent=True) + if device_id == 'N/A': + wake_device_failed = True + logging.debug(f'Failed to wake device {device} via DRM call') + except Exception as e: + wake_device_failed = True + logging.debug(f'Exception waking device {device}: {str(e)}') + if wake_device_failed: + logging.warning('AMD GPU device(s) is/are in a low-power state. Check power control/runtime_status\n') if isConciseInfoRequested(args): showAllConcise(deviceList) if args.showhw: