Fallback to junction temperature and socket power

If the card does not have edge temperature, fallback to junction
temperature. If the card only have socket power, then use socket
power instead.

Change-Id: I053a67a89cf3b29a34e82123f522c08d7dd68916
Этот коммит содержится в:
Bill(Shuzhou) Liu
2024-02-05 10:06:31 -06:00
родитель adf0d7094f
Коммит 5cfe2b4169
3 изменённых файлов: 15 добавлений и 3 удалений
+1 -1
Просмотреть файл
@@ -65,7 +65,7 @@ def config_func(config):
if key == 'field_ids':
field_ids = []
for f in node.values:
field_id = rdc.get_field_id_from_name(f)
field_id = rdc.get_field_id_from_name(str.encode(f))
if field_id.value == rdc_field_t.RDC_FI_INVALID:
print("Invalid field '%s' will be ignored." % (f))
else:
+1 -1
Просмотреть файл
@@ -85,7 +85,7 @@ def get_field_ids(args):
if len(field_id_str)> 0 :
for f in field_id_str:
field_id = rdc.get_field_id_from_name(f)
field_id = rdc.get_field_id_from_name(str.encode(f))
if field_id.value == rdc_field_t.RDC_FI_INVALID:
print("Invalid field '%s' will be ignored." % (f))
else:
+13 -1
Просмотреть файл
@@ -364,12 +364,16 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
}
break;
case RDC_FI_POWER_USAGE:
value->status = rsmi_dev_power_ave_get(gpu_index, RSMI_TEMP_CURRENT, &i64);
{
RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER;
// below call should handle both socket power and regular power
value->status = rsmi_dev_power_get(gpu_index, &i64, &power_type);
value->type = INTEGER;
if (value->status == RSMI_STATUS_SUCCESS) {
value->value.l_int = static_cast<int64_t>(i64);
}
break;
}
case RDC_FI_GPU_CLOCK:
case RDC_FI_MEM_CLOCK:
rsmi_frequencies_t f;
@@ -404,6 +408,14 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
}
value->status = rsmi_dev_temp_metric_get(gpu_index, sensor_type, RSMI_TEMP_CURRENT, &val_i64);
// fallback to hotspot temperature as some card may not have edge temperature.
if (sensor_type == RSMI_TEMP_TYPE_EDGE
&& value->status == RSMI_STATUS_NOT_SUPPORTED) {
sensor_type = RSMI_TEMP_TYPE_JUNCTION;
value->status = rsmi_dev_temp_metric_get(gpu_index, sensor_type,
RSMI_TEMP_CURRENT, &val_i64);
}
value->type = INTEGER;
if (value->status == RSMI_STATUS_SUCCESS) {
value->value.l_int = val_i64;