diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 302c4a20fc..522f08f780 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -912,7 +912,18 @@ class AMDSMICommands(): policy_info = "N/A" logging.debug("Failed to get soc pstate policy info for gpu %s | %s", gpu_id, e.get_error_info()) - static_dict['soc_pstate'] = policy_info + # Format for CSV output - flatten completely to avoid extra columns + if self.logger.is_csv_format() and isinstance(policy_info, dict): + policies_str = ', '.join( + f"{p['policy_id']}:{p['policy_description']}" + for p in policy_info.get('policies', []) + ) or 'N/A' + + static_dict['num_supported'] = policy_info.get('num_supported', 'N/A') + static_dict['current_id'] = policy_info.get('current_id', 'N/A') + static_dict['policies'] = policies_str + else: + static_dict['soc_pstate'] = policy_info if 'xgmi_plpd' in current_platform_args: if args.xgmi_plpd: try: @@ -921,7 +932,18 @@ class AMDSMICommands(): policy_info = "N/A" logging.debug("Failed to get xgmi_plpd info for gpu %s | %s", gpu_id, e.get_error_info()) - static_dict['xgmi_plpd'] = policy_info + # Format for CSV output - flatten completely to avoid extra columns + if self.logger.is_csv_format() and isinstance(policy_info, dict): + policies_str = ', '.join( + f"{p['policy_id']}:{p['policy_description']}" + for p in policy_info.get('policies', []) + ) or 'N/A' + + static_dict['num_supported'] = policy_info.get('num_supported', 'N/A') + static_dict['current_id'] = policy_info.get('current_id', 'N/A') + static_dict['policies'] = policies_str + else: + static_dict['xgmi_plpd'] = policy_info if 'process_isolation' in current_platform_args: if args.process_isolation: try: @@ -4791,6 +4813,16 @@ class AMDSMICommands(): except amdsmi_exception.AmdSmiLibraryException as e: if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: raise PermissionError('Command requires elevation') from e + if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL: + soc_pstate_info = amdsmi_interface.amdsmi_get_soc_pstate(args.gpu) + policy_string = "N/A" + # Check if 'policies' key exists before accessing it + if 'policies' in soc_pstate_info and soc_pstate_info['policies']: + policy_string = "" + for policy in soc_pstate_info['policies']: + policy_string += f"{policy['policy_id']}: {policy['policy_description']}, " + policy_string = policy_string.rstrip(", ") # Remove trailing comma and space + print(f"Valid SOC P-State Policies: [{policy_string}]\n") self.logger.store_output(args.gpu, 'socpstate', f"[{e.get_error_info(detailed=False)}] Unable to set soc pstate dpm policy to {args.soc_pstate}") self.logger.print_output() self.logger.clear_multiple_devices_output() @@ -4805,6 +4837,16 @@ class AMDSMICommands(): except amdsmi_exception.AmdSmiLibraryException as e: if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: raise PermissionError('Command requires elevation') from e + if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL: + xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(args.gpu) + policy_string = "N/A" + # Check if 'policies' key exists before accessing it + if 'policies' in xgmi_plpd_info and xgmi_plpd_info['policies']: + policy_string = "" + for policy in xgmi_plpd_info['policies']: + policy_string += f"{policy['policy_id']}: {policy['policy_description']}, " + policy_string = policy_string.rstrip(", ") # Remove trailing comma and space + print(f"Valid XGMI PLPD Policies: [{policy_string}]\n") self.logger.store_output(args.gpu, 'xgmiplpd', f"[{e.get_error_info(detailed=False)}] Unable to set XGMI per-link power down policy to {args.xgmi_plpd}") self.logger.print_output() self.logger.clear_multiple_devices_output() diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc index 90edb85e70..ab3fb8ec8c 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc @@ -1067,8 +1067,6 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) { sysfs_path += kDevAttribNameMap.at(type); switch (type) { case kDevGPUMClk: - case kDevSocPstate: - case kDevXgmiPlpd: case kDevProcessIsolation: case kDevShaderClean: case kDevDCEFClk: @@ -1082,6 +1080,8 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) { case kDevComputePartition: case kDevMemoryPartition: case kDevXcpConfig: + case kDevSocPstate: + case kDevXgmiPlpd: return writeDevInfoStr(type, val, true); default: diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 0bab87e9a9..d7bc5f5ed5 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -3768,8 +3768,36 @@ amdsmi_status_t amdsmi_get_soc_pstate(amdsmi_processor_handle processor_handle, amdsmi_dpm_policy_t* policy) { AMDSMI_CHECK_INIT(); - return rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0, - reinterpret_cast(policy)); + if (policy == nullptr) { + return AMDSMI_STATUS_INVAL; + } + + // Initialize output structure to zero + memset(policy, 0, sizeof(*policy)); + + // Use rsmi structure with correct size (32-byte description fields) + rsmi_dpm_policy_t rsmi_policy = {}; + amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0, + &rsmi_policy); + + if (ret != AMDSMI_STATUS_SUCCESS) { + return ret; + } + + // Copy data from rsmi structure to amdsmi structure field-by-field + // to handle the different structure sizes properly + policy->num_supported = rsmi_policy.num_supported; + policy->current = rsmi_policy.current; + + for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) { + policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id; + strncpy(policy->policies[i].policy_description, + rsmi_policy.policies[i].policy_description, + AMDSMI_MAX_STRING_LENGTH - 1); + policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0'; + } + + return AMDSMI_STATUS_SUCCESS; } amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle, @@ -3784,8 +3812,36 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle, amdsmi_dpm_policy_t* policy) { AMDSMI_CHECK_INIT(); - return rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0, - reinterpret_cast(policy)); + if (policy == nullptr) { + return AMDSMI_STATUS_INVAL; + } + + // Initialize output structure to zero + memset(policy, 0, sizeof(*policy)); + + // Use rsmi structure with correct size (32-byte description fields) + rsmi_dpm_policy_t rsmi_policy = {}; + amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0, + &rsmi_policy); + + if (ret != AMDSMI_STATUS_SUCCESS) { + return ret; + } + + // Copy data from rsmi structure to amdsmi structure field-by-field + // to handle the different structure sizes properly + policy->num_supported = rsmi_policy.num_supported; + policy->current = rsmi_policy.current; + + for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) { + policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id; + strncpy(policy->policies[i].policy_description, + rsmi_policy.policies[i].policy_description, + AMDSMI_MAX_STRING_LENGTH - 1); + policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0'; + } + + return AMDSMI_STATUS_SUCCESS; } amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle processor_handle,