Apu prerequisites (#1946)

* Don't require powercap support

APUs don't necessarily support setting a power cap from sysfs.
Ignore failures of the file missing.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>

* Show edge temperature in default output if hotspot is missing

APUs don't have a hotspot temperature, they have an edge though.
Use that.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>

* Format all "power" keys as watts

There will be more power keys when APU support is added, so format
them properly.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>

* Don't show power limit in output if it's invalid

APUs can't set power limit using power_cap1 interface.  The limit
will be 0 and thus the UX looks weird in default output.
Only add the `/power_limit` if it's valid.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>

* Unify sizes of `amdsmi_power_info_t`

Sizes are used inconsistently.  This causes tools to not show
N/A when they should.  Make them unified.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>

---------

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
This commit is contained in:
Mario Limonciello
2025-12-08 21:36:45 -06:00
zatwierdzone przez GitHub
rodzic 24ff76a880
commit a08170bc75
6 zmienionych plików z 41 dodań i 23 usunięć
@@ -1947,7 +1947,7 @@ class AMDSMICommands():
power_info[key] = self.helpers.unit_format(self.logger,
value,
voltage_unit)
elif key == "socket_power":
elif 'power' in key:
power_info[key] = self.helpers.unit_format(self.logger,
value,
power_unit)
@@ -7492,7 +7492,13 @@ class AMDSMICommands():
current_power = gpu_metrics['current_socket_power']
else:
current_power = gpu_metrics['average_socket_power']
temperature = gpu_metrics['temperature_hotspot']
# If the hotspot temperature is not available use the edge temp (applicable to APUs)
if gpu_metrics['temperature_hotspot'] != "N/A":
temperature = gpu_metrics['temperature_hotspot']
elif gpu_metrics['temperature_edge'] != "N/A":
temperature = gpu_metrics['temperature_edge']
else:
temperature = "N/A"
else:
mem_util = "N/A"
gfx_util = "N/A"
@@ -1080,7 +1080,12 @@ class AMDSMILogger():
power_usage = gpu_info['power_usage']
if power_usage != "N/A":
power_usage = f"{gpu_info['power_usage']['current_power']}/{gpu_info['power_usage']['power_limit']} W"
power_limit = gpu_info['power_usage']['power_limit']
if power_limit != 0:
power_limit = f"/{power_limit}"
else:
power_limit = ""
power_usage = f"{gpu_info['power_usage']['current_power']}{power_limit} W"
power_usage = str(power_usage).rjust(13)
gpu_id = str(gpu_info['gpu_id']).rjust(3)
@@ -1128,7 +1128,7 @@ int main() {
printf(" Output of amdsmi_get_power_info:\n");
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_AMDSMI_RET(ret)
printf("\tCurrent GFX Voltage: %ld\n", power_measure.gfx_voltage);
printf("\tCurrent GFX Voltage: %" PRIu64 "\n", power_measure.gfx_voltage);
printf("\tAverage socket power: %d\n", power_measure.average_socket_power);
printf("\tGPU Power limit: %d\n\n", power_measure.power_limit);
} else {
@@ -3067,7 +3067,7 @@ def amdsmi_get_power_info(
}
for key, value in power_info_dict.items():
if value == 0xFFFF:
if value in (MaxUIntegerTypes.UINT8_T, MaxUIntegerTypes.UINT16_T, MaxUIntegerTypes.UINT32_T, MaxUIntegerTypes.UINT64_T):
power_info_dict[key] = "N/A"
return power_info_dict
+24 -17
Wyświetl plik
@@ -4556,28 +4556,33 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf
if (status != AMDSMI_STATUS_SUCCESS)
return status;
info->socket_power = 0xFFFF;
info->current_socket_power = 0xFFFF;
info->average_socket_power = 0xFFFF;
info->gfx_voltage = 0xFFFF;
info->soc_voltage = 0xFFFF;
info->mem_voltage = 0xFFFF;
info->power_limit = 0xFFFF;
info->socket_power = get_std_num_limit<decltype(info->socket_power)>();
info->current_socket_power = get_std_num_limit<decltype(info->current_socket_power)>();
info->average_socket_power = get_std_num_limit<decltype(info->average_socket_power)>();
info->gfx_voltage = get_std_num_limit<decltype(info->gfx_voltage)>();
info->soc_voltage = get_std_num_limit<decltype(info->soc_voltage)>();
info->mem_voltage = get_std_num_limit<decltype(info->mem_voltage)>();
info->power_limit = get_std_num_limit<decltype(info->power_limit)>();
amdsmi_gpu_metrics_t metrics = {};
status = amdsmi_get_gpu_metrics_info(processor_handle, &metrics);
if (status == AMDSMI_STATUS_SUCCESS) {
info->current_socket_power = metrics.current_socket_power;
info->average_socket_power = metrics.average_socket_power;
info->gfx_voltage = metrics.voltage_gfx;
info->soc_voltage = metrics.voltage_soc;
info->mem_voltage = metrics.voltage_mem;
}
if (metrics.current_socket_power != get_std_num_limit<decltype(metrics.current_socket_power)>())
info->current_socket_power = metrics.current_socket_power;
if (metrics.average_socket_power != get_std_num_limit<decltype(metrics.average_socket_power)>())
info->average_socket_power = metrics.average_socket_power;
if (metrics.voltage_gfx != get_std_num_limit<decltype(metrics.voltage_gfx)>())
info->gfx_voltage = metrics.voltage_gfx;
if (metrics.voltage_soc != get_std_num_limit<decltype(metrics.voltage_soc)>())
info->soc_voltage = metrics.voltage_soc;
if (metrics.voltage_mem != get_std_num_limit<decltype(metrics.voltage_mem)>())
info->mem_voltage = metrics.voltage_mem;
if (metrics.current_socket_power != 0xFFFF) {
info->socket_power = metrics.current_socket_power;
} else if (metrics.average_socket_power != 0xFFFF) {
info->socket_power = metrics.average_socket_power;
/* store something in socket power */
if (info->current_socket_power != get_std_num_limit<decltype(info->current_socket_power)>())
info->socket_power = info->current_socket_power;
else if (info->average_socket_power != get_std_num_limit<decltype(info->average_socket_power)>())
info->socket_power = info->average_socket_power;
}
int power_limit = 0;
@@ -4585,6 +4590,8 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf
amdsmi_status_t status2 = smi_amdgpu_get_power_cap(gpu_device, 0, &power_limit);
if (status2 == AMDSMI_STATUS_SUCCESS) {
info->power_limit = power_limit;
} else if (status2 == AMDSMI_STATUS_NOT_SUPPORTED) {
status = AMDSMI_STATUS_SUCCESS;
}
// Returning status from amdsmi_get_gpu_metrics_info() which should return SUCCESS
@@ -260,7 +260,7 @@ amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, uint
fullpath += "/power" + std::to_string(sensor_ind + 1) + "_cap";
std::ifstream file(fullpath.c_str(), std::ifstream::in);
if (!file.is_open()) {
return AMDSMI_STATUS_API_FAILED;
return AMDSMI_STATUS_NOT_SUPPORTED;
}
file.getline(val, DATA_SIZE);