Change the clean shader API to clean local data

To be align with the unified API.

Change-Id: I2819339fba6f528204cebd3e9605109e82cbc5b4
This commit is contained in:
Bill(Shuzhou) Liu
2024-06-13 14:13:46 -05:00
parent 94cab382bf
commit e3c63628e5
10 changed files with 31 additions and 39 deletions
+1 -1
View File
@@ -27,7 +27,7 @@ Added `AMDSMI_EVT_NOTIF_RING_HANG` to the possible events in the `amdsmi_evt_not
### Optimizations
- **Updated naming for `amdsmi_set_gpu_clear_sram_data()` to `amdsmi_set_gpu_run_cleaner_shader()`**.
- **Updated naming for `amdsmi_set_gpu_clear_sram_data()` to `amdsmi_clean_gpu_local_data()`**.
Changed the naming to be more accurate to what the function was doing. This change also extends to the CLI where we changed the `clear-sram-data` command to `run-shader` that accepts a shader name to run.
- **Updated `amdsmi_clk_info_t` struct in amdsmi.h and amdsmi_interface.py to align with host/guest**.
+8 -9
View File
@@ -3730,7 +3730,7 @@ class AMDSMICommands():
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
clocks=None, fans=None, profile=None, xgmierr=None, perf_determinism=None,
compute_partition=None, memory_partition=None, power_cap=None, run_shader=None):
compute_partition=None, memory_partition=None, power_cap=None, clean_local_data=None):
"""Issue reset commands to target gpu(s)
Args:
@@ -3746,7 +3746,7 @@ class AMDSMICommands():
compute_partition (bool, optional): Value override for args.compute_partition. Defaults to None.
memory_partition (bool, optional): Value override for args.memory_partition. Defaults to None.
power_cap (bool, optional): Value override for args.power_cap. Defaults to None.
run_shader (bool, optional): Value override for args.run_cleaner_shader. Defaults to None.
clean_local_data (bool, optional): Value override for args.run_cleaner_shader. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -3776,8 +3776,8 @@ class AMDSMICommands():
args.memory_partition = memory_partition
if power_cap:
args.power_cap = power_cap
if run_shader:
args.run_shader = run_shader
if clean_local_data:
args.clean_local_data = clean_local_data
# Handle No GPU passed
if args.gpu == None:
@@ -3796,7 +3796,7 @@ class AMDSMICommands():
# Error if no subcommand args are passed
if not any([args.gpureset, args.clocks, args.fans, args.profile, args.xgmierr, \
args.perf_determinism, args.compute_partition, args.memory_partition, \
args.power_cap, args.run_shader]):
args.power_cap, args.clean_local_data]):
command = " ".join(sys.argv[1:])
raise AmdSmiRequiredCommandException(command, self.logger.format)
@@ -3947,16 +3947,15 @@ class AMDSMICommands():
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to reset power cap to {default_power_cap_in_w} on GPU {gpu_id}") from e
self.logger.store_output(args.gpu, 'powercap', f"Successfully set power cap to {default_power_cap_in_w}")
if args.run_shader:
if args.clean_local_data:
try:
# Only 1 can be used for now.
amdsmi_interface.amdsmi_set_gpu_run_cleaner_shader(args.gpu, 1)
amdsmi_interface.amdsmi_clean_gpu_local_data(args.gpu)
result = 'Successfully clean GPU local data'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to clean local data on GPU {gpu_id}") from e
self.logger.store_output(args.gpu, 'run_shader', result)
self.logger.store_output(args.gpu, 'clean_local_data', result)
if multiple_devices:
self.logger.store_multiple_device_output()
+2 -2
View File
@@ -1071,7 +1071,7 @@ class AMDSMIParser(argparse.ArgumentParser):
reset_compute_help = "Reset compute partitions on the specified GPU"
reset_memory_help = "Reset memory partitions on the specified GPU"
reset_power_cap_help = "Reset power capacity limit to max capable"
reset_gpu_run_cleaner_shader_help = "Run the shader on processor. Only CLEANER shader can be used to clean up data in LDS/GPRs"
reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs"
# Create reset subparser
reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help)
@@ -1097,7 +1097,7 @@ class AMDSMIParser(argparse.ArgumentParser):
reset_parser.add_argument('-o', '--power-cap', action='store_true', required=False, help=reset_power_cap_help)
# Add Baremetal and Virtual OS reset arguments
reset_parser.add_argument('-l', '--run-shader', action='store', choices=["CLEANER"], type=str.upper, required=False, help=reset_gpu_run_cleaner_shader_help, metavar='SHADER_NAME')
reset_parser.add_argument('-l', '--clean-local-data', action='store_true', required=False, help=reset_gpu_clean_local_data_help)
def _add_monitor_parser(self, subparsers, func):
+3 -4
View File
@@ -2035,17 +2035,16 @@ except AmdSmiException as e:
print(e)
```
### amdsmi_set_gpu_run_cleaner_shader
### amdsmi_clean_gpu_local_data
Description: Clear the local data of the given device. This can be called between user logins to prevent information leak.
Input parameters:
* `processor_handle` handle for the given device
* `sclean` the clean flag. Only 1 will take effect and other number are reserved for future usage.
Output: None
Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function:
Exceptions that can be thrown by `amdsmi_clean_gpu_local_data` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
@@ -2060,7 +2059,7 @@ try:
print("No GPUs on machine")
else:
for device in devices:
amdsmi_set_gpu_run_cleaner_shader(device, 1)
amdsmi_clean_gpu_local_data(device)
except AmdSmiException as e:
print(e)
```
+4 -7
View File
@@ -3509,25 +3509,22 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process
uint32_t pisolate);
/**
* @brief Run the cleaner shader to clean up data in LDS/GPRs
* @brief Clean up local data in LDS/GPRs
*
* @platform{gpu_bm_linux} @platform{guest_1vf}
*
* @details Given a processor handle @p processor_handle, and a sclean flag @p sclean,
* this function will clear the local data of this processor. This can be called between
* @details Given a processor handle @p processor_handle,
* this function will clean the local data of this processor. This can be called between
* user logins to prevent information leak.
*
* @note This function requires root access
*
* @param[in] processor_handle a processor handle
*
* @param[in] sclean the clean flag. Only 1 will take effect and other number
* are reserved for future usage.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle,
uint32_t sclean);
amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle);
/** @} End PerfCont */
+3 -4
View File
@@ -2084,17 +2084,16 @@ except AmdSmiException as e:
print(e)
```
### amdsmi_set_gpu_run_cleaner_shader
### amdsmi_clean_gpu_local_data
Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak.
Input parameters:
* `processor_handle` handle for the given device
* `sclean` the clean flag. Only 1 will take effect and other number are reserved for future usage.
Output: None
Exceptions that can be thrown by `amdsmi_set_gpu_run_cleaner_shader` function:
Exceptions that can be thrown by `amdsmi_clean_gpu_local_data` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
@@ -2109,7 +2108,7 @@ try:
print("No GPUs on machine")
else:
for device in devices:
amdsmi_set_gpu_run_cleaner_shader(device, 1)
amdsmi_clean_gpu_local_data(device)
except AmdSmiException as e:
print(e)
```
+1 -1
View File
@@ -136,7 +136,7 @@ from .amdsmi_interface import amdsmi_set_clk_freq
from .amdsmi_interface import amdsmi_set_gpu_overdrive_level
from .amdsmi_interface import amdsmi_set_soc_pstate
from .amdsmi_interface import amdsmi_set_xgmi_plpd
from .amdsmi_interface import amdsmi_set_gpu_run_cleaner_shader
from .amdsmi_interface import amdsmi_clean_gpu_local_data
from .amdsmi_interface import amdsmi_set_gpu_process_isolation
# # Physical State Queries
+3 -4
View File
@@ -2831,17 +2831,16 @@ def amdsmi_set_gpu_process_isolation(
)
def amdsmi_set_gpu_run_cleaner_shader(
def amdsmi_clean_gpu_local_data(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
sclean: int,
):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
_check_res(
amdsmi_wrapper.amdsmi_set_gpu_run_cleaner_shader(
processor_handle, sclean
amdsmi_wrapper.amdsmi_clean_gpu_local_data(
processor_handle
)
)
+4 -4
View File
@@ -2100,9 +2100,9 @@ amdsmi_get_gpu_process_isolation.argtypes = [amdsmi_processor_handle, ctypes.POI
amdsmi_set_gpu_process_isolation = _libraries['libamd_smi.so'].amdsmi_set_gpu_process_isolation
amdsmi_set_gpu_process_isolation.restype = amdsmi_status_t
amdsmi_set_gpu_process_isolation.argtypes = [amdsmi_processor_handle, uint32_t]
amdsmi_set_gpu_run_cleaner_shader = _libraries['libamd_smi.so'].amdsmi_set_gpu_run_cleaner_shader
amdsmi_set_gpu_run_cleaner_shader.restype = amdsmi_status_t
amdsmi_set_gpu_run_cleaner_shader.argtypes = [amdsmi_processor_handle, uint32_t]
amdsmi_clean_gpu_local_data = _libraries['libamd_smi.so'].amdsmi_clean_gpu_local_data
amdsmi_clean_gpu_local_data.restype = amdsmi_status_t
amdsmi_clean_gpu_local_data.argtypes = [amdsmi_processor_handle]
amdsmi_get_lib_version = _libraries['libamd_smi.so'].amdsmi_get_lib_version
amdsmi_get_lib_version.restype = amdsmi_status_t
amdsmi_get_lib_version.argtypes = [ctypes.POINTER(struct_amdsmi_version_t)]
@@ -2711,7 +2711,7 @@ __all__ = \
'amdsmi_set_gpu_perf_determinism_mode',
'amdsmi_set_gpu_perf_level', 'amdsmi_set_gpu_power_profile',
'amdsmi_set_gpu_process_isolation',
'amdsmi_set_gpu_run_cleaner_shader', 'amdsmi_set_power_cap',
'amdsmi_clean_gpu_local_data', 'amdsmi_set_power_cap',
'amdsmi_set_soc_pstate', 'amdsmi_set_xgmi_plpd',
'amdsmi_shut_down', 'amdsmi_smu_fw_version_t',
'amdsmi_socket_handle', 'amdsmi_status_code_to_string',
+2 -3
View File
@@ -1459,12 +1459,11 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process
pisolate);
}
amdsmi_status_t amdsmi_set_gpu_run_cleaner_shader(amdsmi_processor_handle processor_handle,
uint32_t sclean) {
amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle) {
AMDSMI_CHECK_INIT();
return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle,
sclean);
1);
}
amdsmi_status_t