diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 3f4059a86f..2ef524fc31 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -1039,8 +1039,8 @@ class AMDSMIParser(argparse.ArgumentParser): set_power_cap_help = "Set power capacity limit" set_soc_pstate_help = "Set the GPU soc pstate policy using policy id\n" set_xgmi_plpd_help = "Set the GPU XGMI per-link power down policy using policy id\n" - set_process_isolation_help = "Enable or disable the GPU process isolation: 0 for disable and 1 for enable.\n" set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies. \nOf form: amd-smi set -L (sclk | mclk) (min | max) value" + set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis: 0 for disable and 1 for enable.\n" # Help text for CPU set options set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value." @@ -1131,7 +1131,7 @@ class AMDSMIParser(argparse.ArgumentParser): reset_compute_help = "Reset compute partitions on the specified GPU" reset_memory_help = "Reset memory partitions on the specified GPU" reset_power_cap_help = "Reset power capacity limit to max capable" - reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs" + reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs on a per partition basis" # Create reset subparser reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index 2fefdd0ed8..baa6988816 100644 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -3647,12 +3647,9 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, * * @param[in] dv_ind a device index * - * @param[in] sclean the clean flag. Only 1 will take effect and other number - * are reserved for future usage. - * * @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail */ -rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, uint32_t sclean); +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind); /** @} */ // end of PerfCont diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index baeb6a85c9..9a03cb8917 100644 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -2192,8 +2192,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, CATCH } -rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, - uint32_t sclean) { +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind) { rsmi_status_t ret; TRY @@ -2204,7 +2203,11 @@ rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, DEVICE_MUTEX GET_DEV_FROM_INDX - std::string value = std::to_string(sclean); + // To reset you need to provide the partition id + // echo "0" | sudo tee  /sys/class/drm/cardX/device/run_cleaner_shader + uint32_t partition_id = 0; + rsmi_dev_partition_id_get(dv_ind, &partition_id); + std::string value = std::to_string(partition_id); int ret = dev->writeDevInfo(amd::smi::kDevShaderClean , value); return amd::smi::ErrnoToRsmiStatus(ret); diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index cb478122e5..0fdd032ddb 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1858,8 +1858,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle) { AMDSMI_CHECK_INIT(); - return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle, - 1); + return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle); } amdsmi_status_t