[SWDEV-564696] Structure size mismatch in SOC pstate/XGMI PLPD (#2207)

* Address PR feedback: consolidate switch cases, move CSV formatting, use direct API calls for error messages
* csv output flattening changes

---------

Signed-off-by: Billakanti, Koushik <Koushik.Billakanti@amd.com>
这个提交包含在:
koushikbillakanti-amd
2025-12-10 23:37:36 -06:00
提交者 GitHub
父节点 caecbb4d01
当前提交 9e06ea8f79
修改 3 个文件,包含 106 行新增8 行删除
+44 -2
查看文件
@@ -912,7 +912,18 @@ class AMDSMICommands():
policy_info = "N/A"
logging.debug("Failed to get soc pstate policy info for gpu %s | %s", gpu_id, e.get_error_info())
static_dict['soc_pstate'] = policy_info
# Format for CSV output - flatten completely to avoid extra columns
if self.logger.is_csv_format() and isinstance(policy_info, dict):
policies_str = ', '.join(
f"{p['policy_id']}:{p['policy_description']}"
for p in policy_info.get('policies', [])
) or 'N/A'
static_dict['num_supported'] = policy_info.get('num_supported', 'N/A')
static_dict['current_id'] = policy_info.get('current_id', 'N/A')
static_dict['policies'] = policies_str
else:
static_dict['soc_pstate'] = policy_info
if 'xgmi_plpd' in current_platform_args:
if args.xgmi_plpd:
try:
@@ -921,7 +932,18 @@ class AMDSMICommands():
policy_info = "N/A"
logging.debug("Failed to get xgmi_plpd info for gpu %s | %s", gpu_id, e.get_error_info())
static_dict['xgmi_plpd'] = policy_info
# Format for CSV output - flatten completely to avoid extra columns
if self.logger.is_csv_format() and isinstance(policy_info, dict):
policies_str = ', '.join(
f"{p['policy_id']}:{p['policy_description']}"
for p in policy_info.get('policies', [])
) or 'N/A'
static_dict['num_supported'] = policy_info.get('num_supported', 'N/A')
static_dict['current_id'] = policy_info.get('current_id', 'N/A')
static_dict['policies'] = policies_str
else:
static_dict['xgmi_plpd'] = policy_info
if 'process_isolation' in current_platform_args:
if args.process_isolation:
try:
@@ -4791,6 +4813,16 @@ class AMDSMICommands():
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
soc_pstate_info = amdsmi_interface.amdsmi_get_soc_pstate(args.gpu)
policy_string = "N/A"
# Check if 'policies' key exists before accessing it
if 'policies' in soc_pstate_info and soc_pstate_info['policies']:
policy_string = ""
for policy in soc_pstate_info['policies']:
policy_string += f"{policy['policy_id']}: {policy['policy_description']}, "
policy_string = policy_string.rstrip(", ") # Remove trailing comma and space
print(f"Valid SOC P-State Policies: [{policy_string}]\n")
self.logger.store_output(args.gpu, 'socpstate', f"[{e.get_error_info(detailed=False)}] Unable to set soc pstate dpm policy to {args.soc_pstate}")
self.logger.print_output()
self.logger.clear_multiple_devices_output()
@@ -4805,6 +4837,16 @@ class AMDSMICommands():
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(args.gpu)
policy_string = "N/A"
# Check if 'policies' key exists before accessing it
if 'policies' in xgmi_plpd_info and xgmi_plpd_info['policies']:
policy_string = ""
for policy in xgmi_plpd_info['policies']:
policy_string += f"{policy['policy_id']}: {policy['policy_description']}, "
policy_string = policy_string.rstrip(", ") # Remove trailing comma and space
print(f"Valid XGMI PLPD Policies: [{policy_string}]\n")
self.logger.store_output(args.gpu, 'xgmiplpd', f"[{e.get_error_info(detailed=False)}] Unable to set XGMI per-link power down policy to {args.xgmi_plpd}")
self.logger.print_output()
self.logger.clear_multiple_devices_output()
@@ -1067,8 +1067,6 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
sysfs_path += kDevAttribNameMap.at(type);
switch (type) {
case kDevGPUMClk:
case kDevSocPstate:
case kDevXgmiPlpd:
case kDevProcessIsolation:
case kDevShaderClean:
case kDevDCEFClk:
@@ -1082,6 +1080,8 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
case kDevComputePartition:
case kDevMemoryPartition:
case kDevXcpConfig:
case kDevSocPstate:
case kDevXgmiPlpd:
return writeDevInfoStr(type, val, true);
default:
+60 -4
查看文件
@@ -3768,8 +3768,36 @@ amdsmi_status_t amdsmi_get_soc_pstate(amdsmi_processor_handle processor_handle,
amdsmi_dpm_policy_t* policy) {
AMDSMI_CHECK_INIT();
return rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0,
reinterpret_cast<rsmi_dpm_policy_t*>(policy));
if (policy == nullptr) {
return AMDSMI_STATUS_INVAL;
}
// Initialize output structure to zero
memset(policy, 0, sizeof(*policy));
// Use rsmi structure with correct size (32-byte description fields)
rsmi_dpm_policy_t rsmi_policy = {};
amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0,
&rsmi_policy);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
// Copy data from rsmi structure to amdsmi structure field-by-field
// to handle the different structure sizes properly
policy->num_supported = rsmi_policy.num_supported;
policy->current = rsmi_policy.current;
for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) {
policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id;
strncpy(policy->policies[i].policy_description,
rsmi_policy.policies[i].policy_description,
AMDSMI_MAX_STRING_LENGTH - 1);
policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0';
}
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle,
@@ -3784,8 +3812,36 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
amdsmi_dpm_policy_t* policy) {
AMDSMI_CHECK_INIT();
return rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0,
reinterpret_cast<rsmi_dpm_policy_t*>(policy));
if (policy == nullptr) {
return AMDSMI_STATUS_INVAL;
}
// Initialize output structure to zero
memset(policy, 0, sizeof(*policy));
// Use rsmi structure with correct size (32-byte description fields)
rsmi_dpm_policy_t rsmi_policy = {};
amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0,
&rsmi_policy);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
// Copy data from rsmi structure to amdsmi structure field-by-field
// to handle the different structure sizes properly
policy->num_supported = rsmi_policy.num_supported;
policy->current = rsmi_policy.current;
for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) {
policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id;
strncpy(policy->policies[i].policy_description,
rsmi_policy.policies[i].policy_description,
AMDSMI_MAX_STRING_LENGTH - 1);
policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0';
}
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle processor_handle,