From e3c63628e570457c074f39affad89bc809e84eee Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Thu, 13 Jun 2024 14:13:46 -0500 Subject: [PATCH] Change the clean shader API to clean local data To be align with the unified API. Change-Id: I2819339fba6f528204cebd3e9605109e82cbc5b4 --- CHANGELOG.md | 2 +- amdsmi_cli/amdsmi_commands.py | 17 ++++++++--------- amdsmi_cli/amdsmi_parser.py | 4 ++-- docs/how-to/using-amdsmi-for-python.md | 7 +++---- include/amd_smi/amdsmi.h | 11 ++++------- py-interface/README.md | 7 +++---- py-interface/__init__.py | 2 +- py-interface/amdsmi_interface.py | 7 +++---- py-interface/amdsmi_wrapper.py | 8 ++++---- src/amd_smi/amd_smi.cc | 5 ++--- 10 files changed, 31 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 226ff81be2..7decf29215 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ Added `AMDSMI_EVT_NOTIF_RING_HANG` to the possible events in the `amdsmi_evt_not ### Optimizations -- **Updated naming for `amdsmi_set_gpu_clear_sram_data()` to `amdsmi_set_gpu_run_cleaner_shader()`**. +- **Updated naming for `amdsmi_set_gpu_clear_sram_data()` to `amdsmi_clean_gpu_local_data()`**. Changed the naming to be more accurate to what the function was doing. This change also extends to the CLI where we changed the `clear-sram-data` command to `run-shader` that accepts a shader name to run. - **Updated `amdsmi_clk_info_t` struct in amdsmi.h and amdsmi_interface.py to align with host/guest**. diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index d063698921..e68bc5dddb 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -3730,7 +3730,7 @@ class AMDSMICommands(): def reset(self, args, multiple_devices=False, gpu=None, gpureset=None, clocks=None, fans=None, profile=None, xgmierr=None, perf_determinism=None, - compute_partition=None, memory_partition=None, power_cap=None, run_shader=None): + compute_partition=None, memory_partition=None, power_cap=None, clean_local_data=None): """Issue reset commands to target gpu(s) Args: @@ -3746,7 +3746,7 @@ class AMDSMICommands(): compute_partition (bool, optional): Value override for args.compute_partition. Defaults to None. memory_partition (bool, optional): Value override for args.memory_partition. Defaults to None. power_cap (bool, optional): Value override for args.power_cap. Defaults to None. - run_shader (bool, optional): Value override for args.run_cleaner_shader. Defaults to None. + clean_local_data (bool, optional): Value override for args.run_cleaner_shader. Defaults to None. Raises: ValueError: Value error if no gpu value is provided @@ -3776,8 +3776,8 @@ class AMDSMICommands(): args.memory_partition = memory_partition if power_cap: args.power_cap = power_cap - if run_shader: - args.run_shader = run_shader + if clean_local_data: + args.clean_local_data = clean_local_data # Handle No GPU passed if args.gpu == None: @@ -3796,7 +3796,7 @@ class AMDSMICommands(): # Error if no subcommand args are passed if not any([args.gpureset, args.clocks, args.fans, args.profile, args.xgmierr, \ args.perf_determinism, args.compute_partition, args.memory_partition, \ - args.power_cap, args.run_shader]): + args.power_cap, args.clean_local_data]): command = " ".join(sys.argv[1:]) raise AmdSmiRequiredCommandException(command, self.logger.format) @@ -3947,16 +3947,15 @@ class AMDSMICommands(): raise PermissionError('Command requires elevation') from e raise ValueError(f"Unable to reset power cap to {default_power_cap_in_w} on GPU {gpu_id}") from e self.logger.store_output(args.gpu, 'powercap', f"Successfully set power cap to {default_power_cap_in_w}") - if args.run_shader: + if args.clean_local_data: try: - # Only 1 can be used for now. - amdsmi_interface.amdsmi_set_gpu_run_cleaner_shader(args.gpu, 1) + amdsmi_interface.amdsmi_clean_gpu_local_data(args.gpu) result = 'Successfully clean GPU local data' except amdsmi_exception.AmdSmiLibraryException as e: if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: raise PermissionError('Command requires elevation') from e raise ValueError(f"Unable to clean local data on GPU {gpu_id}") from e - self.logger.store_output(args.gpu, 'run_shader', result) + self.logger.store_output(args.gpu, 'clean_local_data', result) if multiple_devices: self.logger.store_multiple_device_output() diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 278f594df8..60fd32bd64 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -1071,7 +1071,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_compute_help = "Reset compute partitions on the specified GPU" reset_memory_help = "Reset memory partitions on the specified GPU" reset_power_cap_help = "Reset power capacity limit to max capable" - reset_gpu_run_cleaner_shader_help = "Run the shader on processor. Only CLEANER shader can be used to clean up data in LDS/GPRs" + reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs" # Create reset subparser reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) @@ -1097,7 +1097,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_parser.add_argument('-o', '--power-cap', action='store_true', required=False, help=reset_power_cap_help) # Add Baremetal and Virtual OS reset arguments - reset_parser.add_argument('-l', '--run-shader', action='store', choices=["CLEANER"], type=str.upper, required=False, help=reset_gpu_run_cleaner_shader_help, metavar='SHADER_NAME') + reset_parser.add_argument('-l', '--clean-local-data', action='store_true', required=False, help=reset_gpu_clean_local_data_help) def _add_monitor_parser(self, subparsers, func): diff --git a/docs/how-to/using-amdsmi-for-python.md b/docs/how-to/using-amdsmi-for-python.md index edfe7172d4..18d4246fc0 100644 --- a/docs/how-to/using-amdsmi-for-python.md +++ b/docs/how-to/using-amdsmi-for-python.md @@ -2035,17 +2035,16 @@ except AmdSmiException as e: print(e) ``` -### amdsmi_set_gpu_run_cleaner_shader +### amdsmi_clean_gpu_local_data Description: Clear the local data of the given device. This can be called between user logins to prevent information leak. Input parameters: * `processor_handle` handle for the given device -* `sclean` the clean flag. Only 1 will take effect and other number are reserved for future usage. Output: None -Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function: +Exceptions that can be thrown by `amdsmi_clean_gpu_local_data` function: * `AmdSmiLibraryException` * `AmdSmiRetryException` @@ -2060,7 +2059,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_run_cleaner_shader(device, 1) + amdsmi_clean_gpu_local_data(device) except AmdSmiException as e: print(e) ``` diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 50ec669255..0a98345e55 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -3509,25 +3509,22 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process uint32_t pisolate); /** - * @brief Run the cleaner shader to clean up data in LDS/GPRs + * @brief Clean up local data in LDS/GPRs * * @platform{gpu_bm_linux} @platform{guest_1vf} * - * @details Given a processor handle @p processor_handle, and a sclean flag @p sclean, - * this function will clear the local data of this processor. This can be called between + * @details Given a processor handle @p processor_handle, + * this function will clean the local data of this processor. This can be called between * user logins to prevent information leak. * * @note This function requires root access * * @param[in] processor_handle a processor handle * - * @param[in] sclean the clean flag. Only 1 will take effect and other number - * are reserved for future usage. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ -amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle, - uint32_t sclean); +amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle); /** @} End PerfCont */ diff --git a/py-interface/README.md b/py-interface/README.md index 21fb77d338..82a25939d0 100644 --- a/py-interface/README.md +++ b/py-interface/README.md @@ -2084,17 +2084,16 @@ except AmdSmiException as e: print(e) ``` -### amdsmi_set_gpu_run_cleaner_shader +### amdsmi_clean_gpu_local_data Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak. Input parameters: * `processor_handle` handle for the given device -* `sclean` the clean flag. Only 1 will take effect and other number are reserved for future usage. Output: None -Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function: +Exceptions that can be thrown by `amdsmi_clean_gpu_local_data` function: * `AmdSmiLibraryException` * `AmdSmiRetryException` @@ -2109,7 +2108,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_run_cleaner_shader(device, 1) + amdsmi_clean_gpu_local_data(device) except AmdSmiException as e: print(e) ``` diff --git a/py-interface/__init__.py b/py-interface/__init__.py index 928b8b53e5..e52de77bb6 100644 --- a/py-interface/__init__.py +++ b/py-interface/__init__.py @@ -136,7 +136,7 @@ from .amdsmi_interface import amdsmi_set_clk_freq from .amdsmi_interface import amdsmi_set_gpu_overdrive_level from .amdsmi_interface import amdsmi_set_soc_pstate from .amdsmi_interface import amdsmi_set_xgmi_plpd -from .amdsmi_interface import amdsmi_set_gpu_run_cleaner_shader +from .amdsmi_interface import amdsmi_clean_gpu_local_data from .amdsmi_interface import amdsmi_set_gpu_process_isolation # # Physical State Queries diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 61012776dd..928fc1b478 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -2831,17 +2831,16 @@ def amdsmi_set_gpu_process_isolation( ) -def amdsmi_set_gpu_run_cleaner_shader( +def amdsmi_clean_gpu_local_data( processor_handle: amdsmi_wrapper.amdsmi_processor_handle, - sclean: int, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) _check_res( - amdsmi_wrapper.amdsmi_set_gpu_run_cleaner_shader( - processor_handle, sclean + amdsmi_wrapper.amdsmi_clean_gpu_local_data( + processor_handle ) ) diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 61b005e3f9..f6b925aae1 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -2100,9 +2100,9 @@ amdsmi_get_gpu_process_isolation.argtypes = [amdsmi_processor_handle, ctypes.POI amdsmi_set_gpu_process_isolation = _libraries['libamd_smi.so'].amdsmi_set_gpu_process_isolation amdsmi_set_gpu_process_isolation.restype = amdsmi_status_t amdsmi_set_gpu_process_isolation.argtypes = [amdsmi_processor_handle, uint32_t] -amdsmi_set_gpu_run_cleaner_shader = _libraries['libamd_smi.so'].amdsmi_set_gpu_run_cleaner_shader -amdsmi_set_gpu_run_cleaner_shader.restype = amdsmi_status_t -amdsmi_set_gpu_run_cleaner_shader.argtypes = [amdsmi_processor_handle, uint32_t] +amdsmi_clean_gpu_local_data = _libraries['libamd_smi.so'].amdsmi_clean_gpu_local_data +amdsmi_clean_gpu_local_data.restype = amdsmi_status_t +amdsmi_clean_gpu_local_data.argtypes = [amdsmi_processor_handle] amdsmi_get_lib_version = _libraries['libamd_smi.so'].amdsmi_get_lib_version amdsmi_get_lib_version.restype = amdsmi_status_t amdsmi_get_lib_version.argtypes = [ctypes.POINTER(struct_amdsmi_version_t)] @@ -2711,7 +2711,7 @@ __all__ = \ 'amdsmi_set_gpu_perf_determinism_mode', 'amdsmi_set_gpu_perf_level', 'amdsmi_set_gpu_power_profile', 'amdsmi_set_gpu_process_isolation', - 'amdsmi_set_gpu_run_cleaner_shader', 'amdsmi_set_power_cap', + 'amdsmi_clean_gpu_local_data', 'amdsmi_set_power_cap', 'amdsmi_set_soc_pstate', 'amdsmi_set_xgmi_plpd', 'amdsmi_shut_down', 'amdsmi_smu_fw_version_t', 'amdsmi_socket_handle', 'amdsmi_status_code_to_string', diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 66cfe15dbd..79c82d52bc 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1459,12 +1459,11 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process pisolate); } -amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle, - uint32_t sclean) { +amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle) { AMDSMI_CHECK_INIT(); return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle, - sclean); + 1); } amdsmi_status_t