diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index b65081d1a3..c065447491 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -106,7 +106,7 @@ The parsing of `pp_od_clk_voltage` was not dynamic enough to work with the dropp Added APIs CLI and APIs to address LeftoverLocals security issues. Allowing clearing the sram data and setting process isolation on a per GPU basis. New APIs: - `amdsmi_get_gpu_process_isolation()` - `amdsmi_set_gpu_process_isolation()` - - `amdsmi_set_gpu_clear_sram_data()` + - `amdsmi_set_gpu_run_cleaner_shader()` - **Added `MIN_POWER` to output of `amd-smi static --limit`**. This change helps users identify the range to which they can change the power cap of the GPU. The change is added to simplify why a device supports (or does not support) power capping (also known as overdrive). See `amd-smi set -g all --power-cap ` or `amd-smi reset -g all --power-cap`. diff --git a/projects/amdsmi/amdsmi_cli/README.md b/projects/amdsmi/amdsmi_cli/README.md index fd53c70716..c4fc361ec0 100644 --- a/projects/amdsmi/amdsmi_cli/README.md +++ b/projects/amdsmi/amdsmi_cli/README.md @@ -579,7 +579,7 @@ Reset Arguments: -C, --compute-partition Reset compute partitions on the specified GPU -M, --memory-partition Reset memory partitions on the specified GPU -o, --power-cap Reset power capacity limit to max capable - -l, --clear-sram-data Clear the GPU SRAM data + -l, --run-shader SHADER_NAME Run the shader on processor. Only CLEANER shader can be used to clean up data in LDS/GPRs Command Modifiers: --json Displays output in JSON format (human readable by default). diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 3aac8f4f6e..109d7daae4 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -3730,7 +3730,7 @@ class AMDSMICommands(): def reset(self, args, multiple_devices=False, gpu=None, gpureset=None, clocks=None, fans=None, profile=None, xgmierr=None, perf_determinism=None, - compute_partition=None, memory_partition=None, power_cap=None, clear_sram_data=None): + compute_partition=None, memory_partition=None, power_cap=None, run_shader=None): """Issue reset commands to target gpu(s) Args: @@ -3746,7 +3746,7 @@ class AMDSMICommands(): compute_partition (bool, optional): Value override for args.compute_partition. Defaults to None. memory_partition (bool, optional): Value override for args.memory_partition. Defaults to None. power_cap (bool, optional): Value override for args.power_cap. Defaults to None. - clear_sram_data (bool, optional): Value override for args.clear_sram_data. Defaults to None. + run_shader (bool, optional): Value override for args.run_cleaner_shader. Defaults to None. Raises: ValueError: Value error if no gpu value is provided @@ -3776,8 +3776,8 @@ class AMDSMICommands(): args.memory_partition = memory_partition if power_cap: args.power_cap = power_cap - if clear_sram_data: - args.clear_sram_data = clear_sram_data + if run_shader: + args.run_shader = run_shader # Handle No GPU passed if args.gpu == None: @@ -3796,7 +3796,7 @@ class AMDSMICommands(): # Error if no subcommand args are passed if not any([args.gpureset, args.clocks, args.fans, args.profile, args.xgmierr, \ args.perf_determinism, args.compute_partition, args.memory_partition, \ - args.power_cap, args.clear_sram_data]): + args.power_cap, args.run_shader]): command = " ".join(sys.argv[1:]) raise AmdSmiRequiredCommandException(command, self.logger.format) @@ -3947,16 +3947,16 @@ class AMDSMICommands(): raise PermissionError('Command requires elevation') from e raise ValueError(f"Unable to reset power cap to {default_power_cap_in_w} on GPU {gpu_id}") from e self.logger.store_output(args.gpu, 'powercap', f"Successfully set power cap to {default_power_cap_in_w}") - if args.clear_sram_data: + if args.run_shader: try: # Only 1 can be used for now. - amdsmi_interface.amdsmi_set_gpu_clear_sram_data(args.gpu, 1) - result = 'Successfully clear GPU SRAM data' + amdsmi_interface.amdsmi_set_gpu_run_cleaner_shader(args.gpu, 1) + result = 'Successfully clean GPU local data' except amdsmi_exception.AmdSmiLibraryException as e: if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to clear SRAM data on GPU {gpu_id}") from e - self.logger.store_output(args.gpu, 'clear_sram_data', result) + raise ValueError(f"Unable to clean local data on GPU {gpu_id}") from e + self.logger.store_output(args.gpu, 'run_shader', result) if multiple_devices: self.logger.store_multiple_device_output() diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index b29a5bbb15..edc188b737 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -1061,7 +1061,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_compute_help = "Reset compute partitions on the specified GPU" reset_memory_help = "Reset memory partitions on the specified GPU" reset_power_cap_help = "Reset power capacity limit to max capable" - reset_gpu_clear_sram_data_help = "Clear the GPU SRAM data\n" + reset_gpu_run_cleaner_shader_help = "Run the shader on processor. Only CLEANER shader can be used to clean up data in LDS/GPRs" # Create reset subparser reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) @@ -1087,7 +1087,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_parser.add_argument('-o', '--power-cap', action='store_true', required=False, help=reset_power_cap_help) # Add Baremetal and Virtual OS reset arguments - reset_parser.add_argument('-l', '--clear-sram-data', action='store_true', required=False, help=reset_gpu_clear_sram_data_help) + reset_parser.add_argument('-l', '--run-shader', action='store', choices=["CLEANER"], type=str.upper, required=False, help=reset_gpu_run_cleaner_shader_help, metavar='SHADER_NAME') def _add_monitor_parser(self, subparsers, func): diff --git a/projects/amdsmi/docs/how-to/using-amdsmi-for-python.md b/projects/amdsmi/docs/how-to/using-amdsmi-for-python.md index d6c0172595..edfe7172d4 100644 --- a/projects/amdsmi/docs/how-to/using-amdsmi-for-python.md +++ b/projects/amdsmi/docs/how-to/using-amdsmi-for-python.md @@ -2035,8 +2035,8 @@ except AmdSmiException as e: print(e) ``` -### amdsmi_set_gpu_clear_sram_data -Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak. +### amdsmi_set_gpu_run_cleaner_shader +Description: Clear the local data of the given device. This can be called between user logins to prevent information leak. Input parameters: @@ -2045,7 +2045,7 @@ Input parameters: Output: None -Exceptions that can be thrown by `amdsmi_set_gpu_clear_sram_data` function: +Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function: * `AmdSmiLibraryException` * `AmdSmiRetryException` @@ -2060,7 +2060,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_clear_sram_data(device, 1) + amdsmi_set_gpu_run_cleaner_shader(device, 1) except AmdSmiException as e: print(e) ``` diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 7a3842d423..01139461a7 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -3509,12 +3509,12 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process uint32_t pisolate); /** - * @brief Clear the GPU SRAM data + * @brief Run the cleaner shader to clean up data in LDS/GPRs * * @platform{gpu_bm_linux} @platform{guest_1vf} * * @details Given a processor handle @p processor_handle, and a sclean flag @p sclean, - * this function will clear the SRAM data of this processor. This can be called between + * this function will clear the local data of this processor. This can be called between * user logins to prevent information leak. * * @note This function requires root access @@ -3526,7 +3526,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ -amdsmi_status_t amdsmi_set_gpu_clear_sram_data(amdsmi_processor_handle processor_handle, +amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle, uint32_t sclean); /** @} End PerfCont */ diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 286840db59..1863a4abff 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -2076,7 +2076,7 @@ except AmdSmiException as e: print(e) ``` -### amdsmi_set_gpu_clear_sram_data +### amdsmi_set_gpu_run_cleaner_shader Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak. Input parameters: @@ -2086,7 +2086,7 @@ Input parameters: Output: None -Exceptions that can be thrown by `amdsmi_set_gpu_clear_sram_data` function: +Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function: * `AmdSmiLibraryException` * `AmdSmiRetryException` @@ -2101,7 +2101,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_clear_sram_data(device, 1) + amdsmi_set_gpu_run_cleaner_shader(device, 1) except AmdSmiException as e: print(e) ``` diff --git a/projects/amdsmi/py-interface/__init__.py b/projects/amdsmi/py-interface/__init__.py index 0d642993c1..928b8b53e5 100644 --- a/projects/amdsmi/py-interface/__init__.py +++ b/projects/amdsmi/py-interface/__init__.py @@ -136,7 +136,7 @@ from .amdsmi_interface import amdsmi_set_clk_freq from .amdsmi_interface import amdsmi_set_gpu_overdrive_level from .amdsmi_interface import amdsmi_set_soc_pstate from .amdsmi_interface import amdsmi_set_xgmi_plpd -from .amdsmi_interface import amdsmi_set_gpu_clear_sram_data +from .amdsmi_interface import amdsmi_set_gpu_run_cleaner_shader from .amdsmi_interface import amdsmi_set_gpu_process_isolation # # Physical State Queries diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index f025956313..61012776dd 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -2831,7 +2831,7 @@ def amdsmi_set_gpu_process_isolation( ) -def amdsmi_set_gpu_clear_sram_data( +def amdsmi_set_gpu_run_cleaner_shader( processor_handle: amdsmi_wrapper.amdsmi_processor_handle, sclean: int, ): @@ -2840,7 +2840,7 @@ def amdsmi_set_gpu_clear_sram_data( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) _check_res( - amdsmi_wrapper.amdsmi_set_gpu_clear_sram_data( + amdsmi_wrapper.amdsmi_set_gpu_run_cleaner_shader( processor_handle, sclean ) ) diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index a9255ee8b0..df85038505 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -2100,9 +2100,9 @@ amdsmi_get_gpu_process_isolation.argtypes = [amdsmi_processor_handle, ctypes.POI amdsmi_set_gpu_process_isolation = _libraries['libamd_smi.so'].amdsmi_set_gpu_process_isolation amdsmi_set_gpu_process_isolation.restype = amdsmi_status_t amdsmi_set_gpu_process_isolation.argtypes = [amdsmi_processor_handle, uint32_t] -amdsmi_set_gpu_clear_sram_data = _libraries['libamd_smi.so'].amdsmi_set_gpu_clear_sram_data -amdsmi_set_gpu_clear_sram_data.restype = amdsmi_status_t -amdsmi_set_gpu_clear_sram_data.argtypes = [amdsmi_processor_handle, uint32_t] +amdsmi_set_gpu_run_cleaner_shader = _libraries['libamd_smi.so'].amdsmi_set_gpu_run_cleaner_shader +amdsmi_set_gpu_run_cleaner_shader.restype = amdsmi_status_t +amdsmi_set_gpu_run_cleaner_shader.argtypes = [amdsmi_processor_handle, uint32_t] amdsmi_get_lib_version = _libraries['libamd_smi.so'].amdsmi_get_lib_version amdsmi_get_lib_version.restype = amdsmi_status_t amdsmi_get_lib_version.argtypes = [ctypes.POINTER(struct_amdsmi_version_t)] @@ -2703,7 +2703,7 @@ __all__ = \ 'amdsmi_set_cpu_socket_boostlimit', 'amdsmi_set_cpu_socket_lclk_dpm_level', 'amdsmi_set_cpu_socket_power_cap', 'amdsmi_set_cpu_xgmi_width', - 'amdsmi_set_soc_pstate', 'amdsmi_set_gpu_clear_sram_data', + 'amdsmi_set_soc_pstate', 'amdsmi_set_gpu_run_cleaner_shader', 'amdsmi_set_gpu_clk_range', 'amdsmi_set_gpu_compute_partition', 'amdsmi_set_gpu_event_notification_mask', 'amdsmi_set_gpu_fan_speed', 'amdsmi_set_gpu_memory_partition', diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h index 8b3ec699a3..de0b65a806 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h @@ -3451,10 +3451,10 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, uint32_t pisolate); /** - * @brief Clear the GPU SRAM data + * @brief Run the cleaner shader to clean up data in LDS/GPRs * * @details Given a device index @p dv_ind, this function will clear the - * GPU SRAM data of this device. This can be called between user logins to prevent information leak. + * GPU local data of this device. This can be called between user logins to prevent information leak. * * @note This function requires root access * @@ -3465,7 +3465,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, * * @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail */ -rsmi_status_t rsmi_dev_gpu_clear_sram_data(uint32_t dv_ind, uint32_t sclean); +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, uint32_t sclean); /** @} */ // end of PerfCont diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc index 03dc5abe58..790314f89f 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc @@ -2123,7 +2123,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, CATCH } -rsmi_status_t rsmi_dev_gpu_clear_sram_data(uint32_t dv_ind, +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, uint32_t sclean) { rsmi_status_t ret; diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 1b317ee385..39c4e7972b 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -1457,11 +1457,11 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process pisolate); } -amdsmi_status_t amdsmi_set_gpu_clear_sram_data(amdsmi_processor_handle processor_handle, +amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle, uint32_t sclean) { AMDSMI_CHECK_INIT(); - return rsmi_wrapper(rsmi_dev_gpu_clear_sram_data, processor_handle, + return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle, sclean); }