[SWDEV-564696] Structure size mismatch in SOC pstate/XGMI PLPD (#2207)
* Address PR feedback: consolidate switch cases, move CSV formatting, use direct API calls for error messages * csv output flattening changes --------- Signed-off-by: Billakanti, Koushik <Koushik.Billakanti@amd.com>
这个提交包含在:
@@ -912,7 +912,18 @@ class AMDSMICommands():
|
||||
policy_info = "N/A"
|
||||
logging.debug("Failed to get soc pstate policy info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
static_dict['soc_pstate'] = policy_info
|
||||
# Format for CSV output - flatten completely to avoid extra columns
|
||||
if self.logger.is_csv_format() and isinstance(policy_info, dict):
|
||||
policies_str = ', '.join(
|
||||
f"{p['policy_id']}:{p['policy_description']}"
|
||||
for p in policy_info.get('policies', [])
|
||||
) or 'N/A'
|
||||
|
||||
static_dict['num_supported'] = policy_info.get('num_supported', 'N/A')
|
||||
static_dict['current_id'] = policy_info.get('current_id', 'N/A')
|
||||
static_dict['policies'] = policies_str
|
||||
else:
|
||||
static_dict['soc_pstate'] = policy_info
|
||||
if 'xgmi_plpd' in current_platform_args:
|
||||
if args.xgmi_plpd:
|
||||
try:
|
||||
@@ -921,7 +932,18 @@ class AMDSMICommands():
|
||||
policy_info = "N/A"
|
||||
logging.debug("Failed to get xgmi_plpd info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
static_dict['xgmi_plpd'] = policy_info
|
||||
# Format for CSV output - flatten completely to avoid extra columns
|
||||
if self.logger.is_csv_format() and isinstance(policy_info, dict):
|
||||
policies_str = ', '.join(
|
||||
f"{p['policy_id']}:{p['policy_description']}"
|
||||
for p in policy_info.get('policies', [])
|
||||
) or 'N/A'
|
||||
|
||||
static_dict['num_supported'] = policy_info.get('num_supported', 'N/A')
|
||||
static_dict['current_id'] = policy_info.get('current_id', 'N/A')
|
||||
static_dict['policies'] = policies_str
|
||||
else:
|
||||
static_dict['xgmi_plpd'] = policy_info
|
||||
if 'process_isolation' in current_platform_args:
|
||||
if args.process_isolation:
|
||||
try:
|
||||
@@ -4791,6 +4813,16 @@ class AMDSMICommands():
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
|
||||
soc_pstate_info = amdsmi_interface.amdsmi_get_soc_pstate(args.gpu)
|
||||
policy_string = "N/A"
|
||||
# Check if 'policies' key exists before accessing it
|
||||
if 'policies' in soc_pstate_info and soc_pstate_info['policies']:
|
||||
policy_string = ""
|
||||
for policy in soc_pstate_info['policies']:
|
||||
policy_string += f"{policy['policy_id']}: {policy['policy_description']}, "
|
||||
policy_string = policy_string.rstrip(", ") # Remove trailing comma and space
|
||||
print(f"Valid SOC P-State Policies: [{policy_string}]\n")
|
||||
self.logger.store_output(args.gpu, 'socpstate', f"[{e.get_error_info(detailed=False)}] Unable to set soc pstate dpm policy to {args.soc_pstate}")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
@@ -4805,6 +4837,16 @@ class AMDSMICommands():
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
|
||||
xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(args.gpu)
|
||||
policy_string = "N/A"
|
||||
# Check if 'policies' key exists before accessing it
|
||||
if 'policies' in xgmi_plpd_info and xgmi_plpd_info['policies']:
|
||||
policy_string = ""
|
||||
for policy in xgmi_plpd_info['policies']:
|
||||
policy_string += f"{policy['policy_id']}: {policy['policy_description']}, "
|
||||
policy_string = policy_string.rstrip(", ") # Remove trailing comma and space
|
||||
print(f"Valid XGMI PLPD Policies: [{policy_string}]\n")
|
||||
self.logger.store_output(args.gpu, 'xgmiplpd', f"[{e.get_error_info(detailed=False)}] Unable to set XGMI per-link power down policy to {args.xgmi_plpd}")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
|
||||
@@ -1067,8 +1067,6 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
sysfs_path += kDevAttribNameMap.at(type);
|
||||
switch (type) {
|
||||
case kDevGPUMClk:
|
||||
case kDevSocPstate:
|
||||
case kDevXgmiPlpd:
|
||||
case kDevProcessIsolation:
|
||||
case kDevShaderClean:
|
||||
case kDevDCEFClk:
|
||||
@@ -1082,6 +1080,8 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
case kDevXcpConfig:
|
||||
case kDevSocPstate:
|
||||
case kDevXgmiPlpd:
|
||||
return writeDevInfoStr(type, val, true);
|
||||
|
||||
default:
|
||||
|
||||
@@ -3768,8 +3768,36 @@ amdsmi_status_t amdsmi_get_soc_pstate(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_dpm_policy_t* policy) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
return rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0,
|
||||
reinterpret_cast<rsmi_dpm_policy_t*>(policy));
|
||||
if (policy == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
// Initialize output structure to zero
|
||||
memset(policy, 0, sizeof(*policy));
|
||||
|
||||
// Use rsmi structure with correct size (32-byte description fields)
|
||||
rsmi_dpm_policy_t rsmi_policy = {};
|
||||
amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_soc_pstate_get, processor_handle, 0,
|
||||
&rsmi_policy);
|
||||
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Copy data from rsmi structure to amdsmi structure field-by-field
|
||||
// to handle the different structure sizes properly
|
||||
policy->num_supported = rsmi_policy.num_supported;
|
||||
policy->current = rsmi_policy.current;
|
||||
|
||||
for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) {
|
||||
policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id;
|
||||
strncpy(policy->policies[i].policy_description,
|
||||
rsmi_policy.policies[i].policy_description,
|
||||
AMDSMI_MAX_STRING_LENGTH - 1);
|
||||
policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0';
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle,
|
||||
@@ -3784,8 +3812,36 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_dpm_policy_t* policy) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
return rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0,
|
||||
reinterpret_cast<rsmi_dpm_policy_t*>(policy));
|
||||
if (policy == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
// Initialize output structure to zero
|
||||
memset(policy, 0, sizeof(*policy));
|
||||
|
||||
// Use rsmi structure with correct size (32-byte description fields)
|
||||
rsmi_dpm_policy_t rsmi_policy = {};
|
||||
amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle, 0,
|
||||
&rsmi_policy);
|
||||
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Copy data from rsmi structure to amdsmi structure field-by-field
|
||||
// to handle the different structure sizes properly
|
||||
policy->num_supported = rsmi_policy.num_supported;
|
||||
policy->current = rsmi_policy.current;
|
||||
|
||||
for (uint32_t i = 0; i < rsmi_policy.num_supported && i < AMDSMI_MAX_NUM_PM_POLICIES; i++) {
|
||||
policy->policies[i].policy_id = rsmi_policy.policies[i].policy_id;
|
||||
strncpy(policy->policies[i].policy_description,
|
||||
rsmi_policy.policies[i].policy_description,
|
||||
AMDSMI_MAX_STRING_LENGTH - 1);
|
||||
policy->policies[i].policy_description[AMDSMI_MAX_STRING_LENGTH - 1] = '\0';
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle processor_handle,
|
||||
|
||||
在新工单中引用
屏蔽一个用户