Fallback to junction temperature and socket power
If the card does not have edge temperature, fallback to junction
temperature. If the card only have socket power, then use socket
power instead.
Change-Id: I053a67a89cf3b29a34e82123f522c08d7dd68916
[ROCm/rdc commit: 5cfe2b4169]
This commit is contained in:
@@ -65,7 +65,7 @@ def config_func(config):
|
||||
if key == 'field_ids':
|
||||
field_ids = []
|
||||
for f in node.values:
|
||||
field_id = rdc.get_field_id_from_name(f)
|
||||
field_id = rdc.get_field_id_from_name(str.encode(f))
|
||||
if field_id.value == rdc_field_t.RDC_FI_INVALID:
|
||||
print("Invalid field '%s' will be ignored." % (f))
|
||||
else:
|
||||
|
||||
@@ -85,7 +85,7 @@ def get_field_ids(args):
|
||||
|
||||
if len(field_id_str)> 0 :
|
||||
for f in field_id_str:
|
||||
field_id = rdc.get_field_id_from_name(f)
|
||||
field_id = rdc.get_field_id_from_name(str.encode(f))
|
||||
if field_id.value == rdc_field_t.RDC_FI_INVALID:
|
||||
print("Invalid field '%s' will be ignored." % (f))
|
||||
else:
|
||||
|
||||
@@ -364,12 +364,16 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
|
||||
}
|
||||
break;
|
||||
case RDC_FI_POWER_USAGE:
|
||||
value->status = rsmi_dev_power_ave_get(gpu_index, RSMI_TEMP_CURRENT, &i64);
|
||||
{
|
||||
RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER;
|
||||
// below call should handle both socket power and regular power
|
||||
value->status = rsmi_dev_power_get(gpu_index, &i64, &power_type);
|
||||
value->type = INTEGER;
|
||||
if (value->status == RSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = static_cast<int64_t>(i64);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RDC_FI_GPU_CLOCK:
|
||||
case RDC_FI_MEM_CLOCK:
|
||||
rsmi_frequencies_t f;
|
||||
@@ -404,6 +408,14 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
|
||||
}
|
||||
value->status = rsmi_dev_temp_metric_get(gpu_index, sensor_type, RSMI_TEMP_CURRENT, &val_i64);
|
||||
|
||||
// fallback to hotspot temperature as some card may not have edge temperature.
|
||||
if (sensor_type == RSMI_TEMP_TYPE_EDGE
|
||||
&& value->status == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
sensor_type = RSMI_TEMP_TYPE_JUNCTION;
|
||||
value->status = rsmi_dev_temp_metric_get(gpu_index, sensor_type,
|
||||
RSMI_TEMP_CURRENT, &val_i64);
|
||||
}
|
||||
|
||||
value->type = INTEGER;
|
||||
if (value->status == RSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = val_i64;
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user