Merge 'master' into 'amd-master'
Change-Id: I3b26f926aafd63fa403a46e93d1889f8560fa160
Este commit está contenido en:
Archivo binario no mostrado.
@@ -238,6 +238,21 @@ typedef enum {
|
||||
typedef rsmi_temperature_metric_t rsmi_temperature_metric;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* @brief This ennumeration is used to indicate from which part of the device a
|
||||
* temperature reading should be obtained.
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_TEMP_TYPE_FIRST = 0,
|
||||
|
||||
RSMI_TEMP_TYPE_EDGE = RSMI_TEMP_TYPE_FIRST, //!< Edge GPU temperature
|
||||
RSMI_TEMP_TYPE_JUNCTION, //!< Junction/hotspot
|
||||
//!< temperature
|
||||
RSMI_TEMP_TYPE_MEMORY, //!< VRAM temperature
|
||||
|
||||
RSMI_TEMP_TYPE_LAST = RSMI_TEMP_TYPE_MEMORY
|
||||
} rsmi_temperature_type_t;
|
||||
|
||||
/**
|
||||
* @brief Pre-set Profile Selections. These bitmasks can be AND'd with the
|
||||
* ::rsmi_power_profile_status_t.available_profiles returned from
|
||||
@@ -1096,15 +1111,15 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind,
|
||||
* @brief Get the temperature metric value for the specified metric, from the
|
||||
* specified temperature sensor on the specified device.
|
||||
*
|
||||
* @details Given a device index @p dv_ind, a 0-based sensor index
|
||||
* @p sensor_ind, a ::rsmi_temperature_metric_t @p metric and a pointer to an
|
||||
* int64_t @p temperature, this function will write the value of the metric
|
||||
* indicated by @p metric to the memory location @p temperature.
|
||||
* @details Given a device index @p dv_ind, a sensor type @p sensor_type, a
|
||||
* ::rsmi_temperature_metric_t @p metric and a pointer to an int64_t @p
|
||||
* temperature, this function will write the value of the metric indicated by
|
||||
* @p metric and @p sensor_type to the memory location @p temperature.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
|
||||
* If a device has more than one sensor, it could be greater than 0.
|
||||
* @param[in] sensor_type part of device from which temperature should be
|
||||
* obtained. This should come from the enum ::rsmi_temperature_type_t
|
||||
*
|
||||
* @param[in] metric enum indicated which temperature value should be
|
||||
* retrieved
|
||||
@@ -1115,7 +1130,7 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind,
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
rsmi_temperature_metric_t metric, int64_t *temperature);
|
||||
/** @} */ // end of PhysQuer
|
||||
|
||||
|
||||
@@ -47,8 +47,10 @@
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
|
||||
#include "rocm_smi/rocm_smi_common.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -77,6 +79,7 @@ enum MonitorTypes {
|
||||
kMonTempOffset,
|
||||
kMonTempLowest,
|
||||
kMonTempHighest,
|
||||
kMonTempLabel,
|
||||
|
||||
kMonInvalid = 0xFFFFFFFF,
|
||||
};
|
||||
@@ -89,10 +92,14 @@ class Monitor {
|
||||
const std::string path(void) const {return path_;}
|
||||
int readMonitor(MonitorTypes type, uint32_t sensor_ind, std::string *val);
|
||||
int writeMonitor(MonitorTypes type, uint32_t sensor_ind, std::string val);
|
||||
uint32_t setSensorLabelMap(void);
|
||||
uint32_t getSensorIndex(rsmi_temperature_type_t type);
|
||||
|
||||
private:
|
||||
std::string MakeMonitorPath(MonitorTypes type, int32_t sensor_id);
|
||||
std::string path_;
|
||||
const RocmSMI_env_vars *env_;
|
||||
std::map<rsmi_temperature_type_t, uint32_t> temp_type_index_map_;
|
||||
};
|
||||
|
||||
} // namespace smi
|
||||
|
||||
+17
-14
@@ -1180,8 +1180,7 @@ get_id_name_str_from_line(uint64_t id, std::string ln,
|
||||
return ret_str;
|
||||
}
|
||||
|
||||
static rsmi_status_t get_backup_name(uint16_t id, char *name,
|
||||
size_t len, eNameStrType typ) {
|
||||
static rsmi_status_t get_backup_name(uint16_t id, char *name, size_t len) {
|
||||
std::string name_str;
|
||||
|
||||
name_str += "0x";
|
||||
@@ -1291,7 +1290,7 @@ static rsmi_status_t get_dev_name_from_id(uint32_t dv_ind, char *name,
|
||||
val_str.clear();
|
||||
|
||||
return get_backup_name(typ == NAME_STR_DEVICE ?
|
||||
device_id : subsys_id, name, len, typ);
|
||||
device_id : subsys_id, name, len);
|
||||
}
|
||||
|
||||
val_str = get_id_name_str_from_line(vendor_id, ln, &ln_str);
|
||||
@@ -1315,7 +1314,7 @@ static rsmi_status_t get_dev_name_from_id(uint32_t dv_ind, char *name,
|
||||
// We should have already returned if we were looking for
|
||||
// device or subdivce
|
||||
assert(typ == NAME_STR_VENDOR);
|
||||
return get_backup_name(vendor_id, name, len, typ);
|
||||
return get_backup_name(vendor_id, name, len);
|
||||
}
|
||||
size_t ct = val_str.copy(name, len);
|
||||
|
||||
@@ -1467,7 +1466,7 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
rsmi_temperature_metric_t metric, int64_t *temperature) {
|
||||
TRY
|
||||
|
||||
@@ -1478,14 +1477,6 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
rsmi_status_t ret;
|
||||
amd::smi::MonitorTypes mon_type;
|
||||
|
||||
|
||||
// Make any adjustments to sensor_ind here, if index is not a 0 based. For
|
||||
// rocm_smi we are using a 0-based index. However, most of the Linux sysfs
|
||||
// monitor files are 1-based, so we will increment by 1 and make adjustments
|
||||
// for exceptions later.
|
||||
// See https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface
|
||||
++sensor_ind;
|
||||
|
||||
switch (metric) {
|
||||
case RSMI_TEMP_CURRENT:
|
||||
mon_type = amd::smi::kMonTemp;
|
||||
@@ -1535,7 +1526,19 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature);
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
assert(dev->monitor() != nullptr);
|
||||
std::shared_ptr<amd::smi::Monitor> m = dev->monitor();
|
||||
|
||||
uint32_t err = m->setSensorLabelMap();
|
||||
if (err) {
|
||||
return errno_to_rsmi_status(err);
|
||||
}
|
||||
|
||||
uint32_t sensor_index =
|
||||
m->getSensorIndex(static_cast<rsmi_temperature_type_t>(sensor_type));
|
||||
ret = get_dev_mon_value(mon_type, dv_ind, sensor_index, temperature);
|
||||
|
||||
return ret;
|
||||
CATCH
|
||||
|
||||
@@ -86,6 +86,18 @@ static const char *kMonTempCritMinHystName = "temp#_lcrit_hyst";
|
||||
static const char *kMonTempOffsetName = "temp#_offset";
|
||||
static const char *kMonTempLowestName = "temp#_lowest";
|
||||
static const char *kMonTempHighestName = "temp#_highest";
|
||||
static const char *kMonTempLabelName = "temp#_label";
|
||||
|
||||
static const char *kTempSensorTypeMemoryName = "mem";
|
||||
static const char *kTempSensorTypeJunctionName = "junction";
|
||||
static const char *kTempSensorTypeEdgeName = "edge";
|
||||
|
||||
static const std::map<std::string, rsmi_temperature_type_t>
|
||||
kTempSensorNameMap = {
|
||||
{kTempSensorTypeMemoryName, RSMI_TEMP_TYPE_MEMORY},
|
||||
{kTempSensorTypeJunctionName, RSMI_TEMP_TYPE_JUNCTION},
|
||||
{kTempSensorTypeEdgeName, RSMI_TEMP_TYPE_EDGE},
|
||||
};
|
||||
|
||||
static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
|
||||
{kMonName, kMonNameFName},
|
||||
@@ -111,6 +123,7 @@ static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
|
||||
{kMonTempOffset, kMonTempOffsetName},
|
||||
{kMonTempLowest, kMonTempLowestName},
|
||||
{kMonTempHighest, kMonTempHighestName},
|
||||
{kMonTempLabel, kMonTempLabelName},
|
||||
};
|
||||
|
||||
Monitor::Monitor(std::string path, RocmSMI_env_vars const *e) :
|
||||
@@ -152,6 +165,39 @@ int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id,
|
||||
return ReadSysfsStr(sysfs_path, val);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
Monitor::setSensorLabelMap(void) {
|
||||
std::string type_str;
|
||||
int ret;
|
||||
|
||||
if (temp_type_index_map_.size() > 0) {
|
||||
return 0; // We've already filled in the map
|
||||
}
|
||||
auto add_temp_sensor_entry = [&](uint32_t file_index) {
|
||||
ret = readMonitor(kMonTempLabel, file_index, &type_str);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
rsmi_temperature_type_t t_type = kTempSensorNameMap.at(type_str);
|
||||
temp_type_index_map_.insert({t_type, file_index});
|
||||
return 0;
|
||||
};
|
||||
|
||||
for (uint32_t i = 1; i <= 3; ++i) {
|
||||
ret = add_temp_sensor_entry(i);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
Monitor::getSensorIndex(rsmi_temperature_type_t type) {
|
||||
return temp_type_index_map_.at(type);
|
||||
}
|
||||
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -48,12 +48,19 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/functional/temp_read.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
|
||||
|
||||
static const std::map<uint32_t, std::string> kTempSensorNameMap = {
|
||||
{RSMI_TEMP_TYPE_MEMORY, "Memory"},
|
||||
{RSMI_TEMP_TYPE_JUNCTION, "Junction"},
|
||||
{RSMI_TEMP_TYPE_EDGE, "Edge"},
|
||||
};
|
||||
TestTempRead::TestTempRead() : TestBase() {
|
||||
set_title("RSMI Temp Read Test");
|
||||
set_description("The Temperature Read tests verifies that the temperature "
|
||||
@@ -91,12 +98,14 @@ void TestTempRead::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
|
||||
uint32_t type;
|
||||
|
||||
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
|
||||
PrintDeviceHeader(i);
|
||||
|
||||
auto print_temp_metric = [&](rsmi_temperature_metric_t met,
|
||||
std::string label) {
|
||||
err = rsmi_dev_temp_metric_get(i, 0, met, &val_i64);
|
||||
err = rsmi_dev_temp_metric_get(i, type, met, &val_i64);
|
||||
|
||||
if (err != RSMI_STATUS_SUCCESS) {
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
@@ -115,25 +124,31 @@ void TestTempRead::Run(void) {
|
||||
"C" << std::endl;
|
||||
}
|
||||
};
|
||||
print_temp_metric(RSMI_TEMP_CURRENT, "Current Temp.");
|
||||
print_temp_metric(RSMI_TEMP_MAX, "Temperature max value");
|
||||
print_temp_metric(RSMI_TEMP_MIN, "Temperature min value");
|
||||
print_temp_metric(RSMI_TEMP_MAX_HYST,
|
||||
"Temperature hysteresis value for max limit");
|
||||
print_temp_metric(RSMI_TEMP_MIN_HYST,
|
||||
"Temperature hysteresis value for min limit");
|
||||
print_temp_metric(RSMI_TEMP_CRITICAL, "Temperature critical max value");
|
||||
print_temp_metric(RSMI_TEMP_CRITICAL_HYST,
|
||||
"Temperature hysteresis value for critical limit");
|
||||
print_temp_metric(RSMI_TEMP_EMERGENCY,
|
||||
"Temperature emergency max value");
|
||||
print_temp_metric(RSMI_TEMP_EMERGENCY_HYST,
|
||||
"Temperature hysteresis value for emergency limit");
|
||||
print_temp_metric(RSMI_TEMP_CRIT_MIN, "Temperature critical min value");
|
||||
print_temp_metric(RSMI_TEMP_CRIT_MIN_HYST,
|
||||
"Temperature hysteresis value for critical min value");
|
||||
print_temp_metric(RSMI_TEMP_OFFSET, "Temperature offset");
|
||||
print_temp_metric(RSMI_TEMP_LOWEST, "Historical minimum temperature");
|
||||
print_temp_metric(RSMI_TEMP_HIGHEST, "Historical maximum temperature");
|
||||
for (type = RSMI_TEMP_TYPE_FIRST; type <= RSMI_TEMP_TYPE_LAST; ++type) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t** **********" << kTempSensorNameMap.at(type) <<
|
||||
" Temperatures **********" << std::endl;
|
||||
}
|
||||
print_temp_metric(RSMI_TEMP_CURRENT, "Current Temp.");
|
||||
print_temp_metric(RSMI_TEMP_MAX, "Temperature max value");
|
||||
print_temp_metric(RSMI_TEMP_MIN, "Temperature min value");
|
||||
print_temp_metric(RSMI_TEMP_MAX_HYST,
|
||||
"Temperature hysteresis value for max limit");
|
||||
print_temp_metric(RSMI_TEMP_MIN_HYST,
|
||||
"Temperature hysteresis value for min limit");
|
||||
print_temp_metric(RSMI_TEMP_CRITICAL, "Temperature critical max value");
|
||||
print_temp_metric(RSMI_TEMP_CRITICAL_HYST,
|
||||
"Temperature hysteresis value for critical limit");
|
||||
print_temp_metric(RSMI_TEMP_EMERGENCY,
|
||||
"Temperature emergency max value");
|
||||
print_temp_metric(RSMI_TEMP_EMERGENCY_HYST,
|
||||
"Temperature hysteresis value for emergency limit");
|
||||
print_temp_metric(RSMI_TEMP_CRIT_MIN, "Temperature critical min value");
|
||||
print_temp_metric(RSMI_TEMP_CRIT_MIN_HYST,
|
||||
"Temperature hysteresis value for critical min value");
|
||||
print_temp_metric(RSMI_TEMP_OFFSET, "Temperature offset");
|
||||
print_temp_metric(RSMI_TEMP_LOWEST, "Historical minimum temperature");
|
||||
print_temp_metric(RSMI_TEMP_HIGHEST, "Historical maximum temperature");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Referencia en una nueva incidencia
Block a user