From 3659db6f21d196ebf865497648e9f25fa4a52270 Mon Sep 17 00:00:00 2001 From: "Ramalingam, Muthusamy" Date: Tue, 18 Nov 2025 01:15:43 +0530 Subject: [PATCH] [SWDEV-560044]: [AMDSMI][CPU] Update AMDSMI as per latest ESMI Driver (#763) [AMDSMI][CPU] Update AMDSMI as per latest ESMI Driver, 1) hsmp_acpi 2) amd_hsmp 3) hsmp_common Signed-off-by: Muthusamy Ramalingam Signed-off-by: Arif, Maisam Co-authored-by: ssaka_amdeng [ROCm/amdsmi commit: b4b3539631460b986dddc86a2303cef11cd38816] --- projects/amdsmi/CHANGELOG.md | 33 +++++++++++++------ projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 10 +++--- projects/amdsmi/amdsmi_cli/amdsmi_helpers.py | 8 ++--- projects/amdsmi/amdsmi_cli/amdsmi_init.py | 19 +++++------ projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 2 +- projects/amdsmi/docs/install/install.md | 3 +- .../goamdsmi_shim/smiwrapper/amdsmi_go_shim.c | 2 +- .../amdsmi/tests/amd_smi_test/test_base.cc | 7 ++-- 8 files changed, 48 insertions(+), 36 deletions(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 2c3eda88a9..a085d079aa 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -67,16 +67,29 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Changed -- **`amd-smi set --power-cap` now requires specification of the power cap type**. - - Command now takes the form: `amd-smi set --power-cap `. Acceptable power cap types are "ppt0" and "ppt1". - Ex. +- **The `amd-smi` command now shows hsmp rather than amd_hsmp**. + - The hsmp driver version can be shown without the amdgpu version using `amd-smi version -c` + + ```console + $ amd-smi version + AMDSMI Tool: 24.7.1+b446d6c-dirty | AMDSMI Library version: 24.7.2.0 | ROCm version: N/A | amdgpu version: 6.10.10 | hsmp version: 2.2 + + $ amd-smi version -c + AMDSMI Tool: 24.7.1+b446d6c-dirty | AMDSMI Library version: 24.7.2.0 | ROCm version: N/A | hsmp version: 2.2 + ... + ``` + +- **`amd-smi set --power-cap` now requires sepcification of the power cap type**. + - Command now takes the form: `amd-smi set --power-cap ` + - Acceptable power cap types are "ppt0" and "ppt1" + + ```console + $ sudo amd-smi set --power-cap ppt1 1150 + GPU: 0 + POWERCAP: Successfully set ppt1 power cap to 1150W + ... + ``` - ```console - $ sudo amd-smi set --power-cap ppt1 1150 - GPU: 0 - POWERCAP: Successfully set PPT1 power cap to 1150W - ... - ``` - **`amd-smi reset --power-cap` will attempt to reset both power caps**. - When using the reset command, both PPT0 and PPT1 power caps will be reset to their default values. If a device only has PPT0, then only PPT0 will be reset. Ex. @@ -1402,7 +1415,7 @@ Functions affected by struct change are: - **Corrected CLI CPU argument name**. - `--cpu-pwr-svi-telemtry-rails` to `--cpu-pwr-svi-telemetry-rails` -- **Added amdgpu driver version and amd_hsmp driver version to `amd-smi version` command**. +- **Added amdgpu driver version and amd_hsmp driver version to `amd-smi version` command**. - The `amd-smi version` command can now also display the amdgpu driver version using the `-g` flag. - The amd_hsmp driver version can also be displayed using the `-c` flag. - The new default for the `version` command is to display all the version information, including both amdgpu and amd_hsmp driver versions. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 94b9265633..9669683891 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -96,7 +96,7 @@ class AMDSMICommands(): except amdsmi_exception.AmdSmiLibraryException as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_DRV): - logging.info('Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)') + logging.info('Unable to detect any CPU devices, check amd_hsmp (or) hsmp_acpi version and module status (sudo modprobe amd_hsmp (or) sudo modprobe hsmp_acpi)') else: raise e @@ -112,7 +112,7 @@ class AMDSMICommands(): if len(self.cpu_handles) == 0 and len(self.core_handles) == 0: # No CPU's found post amd_hsmp driver initialization - logging.error('Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)') + logging.error('Unable to detect any CPU devices, check amd_hsmp (or) hsmp_acpi version and module status (sudo modprobe amd_hsmp (or) sudo modprobe hsmp_acpi)') exit_flag = True self.convert_clock_type = { @@ -200,7 +200,7 @@ class AMDSMICommands(): if args.gpu_version: human_readable_output = human_readable_output + f" | amdgpu version: {gpu_version_str}" if args.cpu_version: - human_readable_output = human_readable_output + f" | amd_hsmp version: {cpu_version_str}" + human_readable_output = human_readable_output + f" | hsmp version: {cpu_version_str}" # Custom human readable handling for version if self.logger.destination == 'stdout': print(human_readable_output) @@ -2988,7 +2988,7 @@ class AMDSMICommands(): try: bandwidth = amdsmi_interface.amdsmi_get_cpu_current_io_bandwidth(args.cpu, int(args.cpu_io_bandwidth[0][0]), - args.cpu_io_bandwidth[0][1]) + args.cpu_io_bandwidth[0][1].upper()) static_dict["io_bandwidth"]["band_width"] = bandwidth except amdsmi_exception.AmdSmiLibraryException as e: static_dict["io_bandwidth"]["band_width"] = "N/A" @@ -2998,7 +2998,7 @@ class AMDSMICommands(): try: bandwidth = amdsmi_interface.amdsmi_get_cpu_current_xgmi_bw(args.cpu, int(args.cpu_xgmi_bandwidth[0][0]), - args.cpu_xgmi_bandwidth[0][1]) + args.cpu_xgmi_bandwidth[0][1].upper()) static_dict["xgmi_bandwidth"]["band_width"] = bandwidth except amdsmi_exception.AmdSmiLibraryException as e: static_dict["xgmi_bandwidth"]["band_width"] = "N/A" diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index b166a75f6c..e30e433a57 100755 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -239,11 +239,11 @@ class AMDSMIHelpers(): except amdsmi_interface.AmdSmiLibraryException as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): - logging.info('Unable to get device choices, driver not initialized (amd_hsmp not found in modules)') + logging.info('Unable to get device choices, driver not initialized (amd_hsmp or hsmp_acpi not found in modules)') else: raise e if len(cpu_handles) == 0: - logging.info('Unable to find any devices, check if driver is initialized (amd_hsmp not found in modules)') + logging.info('Unable to find any devices, check if driver is initialized (amd_hsmp or hsmp_acpi not found in modules)') else: # Handle spacing for the gpu_choices_str max_padding = int(math.log10(len(cpu_handles))) + 1 @@ -285,11 +285,11 @@ class AMDSMIHelpers(): except amdsmi_interface.AmdSmiLibraryException as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): - logging.info('Unable to get device choices, driver not initialized (amd_hsmp not found in modules)') + logging.info('Unable to get device choices, driver not initialized (amd_hsmp or hsmp_acpi not found in modules)') else: raise e if len(core_handles) == 0: - logging.info('Unable to find any devices, check if driver is initialized (amd_hsmp not found in modules)') + logging.info('Unable to find any devices, check if driver is initialized (amd_hsmp or hsmp_acpi not found in modules)') else: # Handle spacing for the gpu_choices_str max_padding = int(math.log10(len(core_handles))) + 1 diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_init.py b/projects/amdsmi/amdsmi_cli/amdsmi_init.py index c8c84c12f1..7271dae941 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_init.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_init.py @@ -62,10 +62,9 @@ def check_amdgpu_driver(): def check_amd_hsmp_driver(): - """ Returns true if amd_hsmp is found in the list of initialized modules """ - amd_cpu_status_file = Path("/sys/module/amd_hsmp/initstate") + """ Returns true if amd_hsmp or hsmp_acpi is found in the list of initialized modules """ + amd_cpu_status_file = Path("/dev/hsmp") if amd_cpu_status_file.exists(): - if amd_cpu_status_file.read_text(encoding="ascii").strip() == "live": return True return False @@ -73,7 +72,7 @@ def check_amd_hsmp_driver(): def amdsmi_cli_init(): """ Initializes AMDSMI Library for the CLI - Checks for the presence of the amdgpu and amd_hsmp drivers and initializes the + Checks for the presence of the amdgpu, amd_hsmp or hsmp_acpi drivers and initializes the AMD SMI library based on the live drivers found. Return: @@ -85,13 +84,13 @@ def amdsmi_cli_init(): init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_ALL_PROCESSORS if check_amdgpu_driver() and check_amd_hsmp_driver(): init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS - logging.debug("Both amdgpu and amd_hsmp driver's initstate is live") + logging.debug("Both amdgpu , amd_hsmp or hsmp_acpi driver's initstate is live") try: amdsmi_interface.amdsmi_init(init_flag) except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): - logging.error("Drivers not loaded (amdgpu and amd_hsmp drivers not found in modules)") + logging.error("Drivers not loaded (amdgpu, amd_hsmp or hsmp_acpi drivers not found in modules)") sys.exit(-1) else: raise e @@ -107,20 +106,20 @@ def amdsmi_cli_init(): sys.exit(-1) else: raise e - logging.debug("amdgpu driver initialized successfully, but amd_hsmp initstate was not live") + logging.debug("amdgpu driver initialized successfully, but amd_hsmp or hsmp_acpi initstate was not live") elif check_amd_hsmp_driver(): init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS - logging.debug("amd_hsmp driver initstate is live") + logging.debug("amd_hsmp or hsmp_acpi driver initstate is live") try: amdsmi_interface.amdsmi_init(init_flag) except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): - logging.error("Driver not loaded (amd_hsmp not found in modules)") + logging.error("Driver not loaded (amd_hsmp or hsmp_acpi not found in modules)") sys.exit(-1) else: raise e - logging.debug("amd_hsmp driver initialized successfully, but amdgpu initstate was not live") + logging.debug("amd_hsmp or hsmp_acpi driver initialized successfully, but amdgpu initstate was not live") logging.debug(f"AMDSMI initialized with atleast one driver successfully | init flag: {init_flag}") diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 85dc02abdf..98370bbd7d 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -754,7 +754,7 @@ class AMDSMIParser(argparse.ArgumentParser): # help info gpu_version_help = "Display the current amdgpu driver version" - cpu_version_help = "Display the current amd_hsmp driver version" + cpu_version_help = "Display the current amd_hsmp or hsmp_acpi driver version" # Add GPU and CPU version Arguments version_parser.add_argument('-g', '--gpu_version', action='store_true', required=False, help=gpu_version_help, default=None) diff --git a/projects/amdsmi/docs/install/install.md b/projects/amdsmi/docs/install/install.md index 2c4e0ea8be..cc8a3d0632 100644 --- a/projects/amdsmi/docs/install/install.md +++ b/projects/amdsmi/docs/install/install.md @@ -37,8 +37,7 @@ AMD SMI library can run on AMD ROCm supported platforms. Refer to for more information. -To run the AMD SMI library, the `amdgpu` driver and the `amd_hsmp` driver need -to be installed. Optionally, `libdrm` can be installed to query firmware +To run the AMD SMI library, the `amdgpu` driver and the `amd_hsmp` or `hsmp_acpi` driver need to be installed. Optionally, `libdrm` can be installed to query firmware information and hardware IPs. ### Python interface and CLI tool prerequisites diff --git a/projects/amdsmi/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c b/projects/amdsmi/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c index faddd048b6..1cd5174ca5 100644 --- a/projects/amdsmi/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c +++ b/projects/amdsmi/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c @@ -45,7 +45,7 @@ #define AMDGPU_INITSTATE_FILE "/sys/module/amdgpu/initstate" #define AMDHSMP_DRIVER_NAME "AMDHSMPDriver" -#define AMDHSMP_INITSTATE_FILE "/sys/module/amd_hsmp/initstate" +#define AMDHSMP_INITSTATE_FILE "/dev/hsmp" static uint32_t num_apuSockets = GOAMDSMI_VALUE_0; static uint32_t num_cpuSockets = GOAMDSMI_VALUE_0; diff --git a/projects/amdsmi/tests/amd_smi_test/test_base.cc b/projects/amdsmi/tests/amd_smi_test/test_base.cc index c010ab0e6e..98a36f4623 100644 --- a/projects/amdsmi/tests/amd_smi_test/test_base.cc +++ b/projects/amdsmi/tests/amd_smi_test/test_base.cc @@ -98,12 +98,13 @@ void TestBase::SetUp(uint64_t init_flags) { } } - // Returns true if amd_hsmp is found in the list of initialized modules - bool found_amd_hsmp = CheckModule("/sys/module/amd_hsmp/initstate", "live"); + // Returns true if amd_hsmp or hsmp_acpi is found in the list of initialized modules + struct stat buffer; + bool found_amd_hsmp = (stat("/dev/hsmp", &buffer)==0); if (!found_amd_hsmp) { IF_VERB(STANDARD) { std::cerr << "ERROR: Unable to get devices, driver not initialized (amd_hsmp not found in modules)" << std::endl; - std::cerr << "ERROR: Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)" << std::endl; + std::cerr << "ERROR: Unable to detect any CPU devices, check amd_hsmp (or) hsmp_acpi version and module status (sudo modprobe amd_hsmp (or) sudo modprobe hsmp_acpi)" << std::endl; } }