Merge 'master' into 'amd-master'

Change-Id: I3b26f926aafd63fa403a46e93d1889f8560fa160
Este commit está contenido en:
Jenkins
2019-05-27 02:12:53 -07:00
Se han modificado 6 ficheros con 128 adiciones y 42 borrados
Archivo binario no mostrado.
+22 -7
Ver fichero
@@ -238,6 +238,21 @@ typedef enum {
typedef rsmi_temperature_metric_t rsmi_temperature_metric;
/// \endcond
/**
* @brief This ennumeration is used to indicate from which part of the device a
* temperature reading should be obtained.
*/
typedef enum {
RSMI_TEMP_TYPE_FIRST = 0,
RSMI_TEMP_TYPE_EDGE = RSMI_TEMP_TYPE_FIRST, //!< Edge GPU temperature
RSMI_TEMP_TYPE_JUNCTION, //!< Junction/hotspot
//!< temperature
RSMI_TEMP_TYPE_MEMORY, //!< VRAM temperature
RSMI_TEMP_TYPE_LAST = RSMI_TEMP_TYPE_MEMORY
} rsmi_temperature_type_t;
/**
* @brief Pre-set Profile Selections. These bitmasks can be AND'd with the
* ::rsmi_power_profile_status_t.available_profiles returned from
@@ -1096,15 +1111,15 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind,
* @brief Get the temperature metric value for the specified metric, from the
* specified temperature sensor on the specified device.
*
* @details Given a device index @p dv_ind, a 0-based sensor index
* @p sensor_ind, a ::rsmi_temperature_metric_t @p metric and a pointer to an
* int64_t @p temperature, this function will write the value of the metric
* indicated by @p metric to the memory location @p temperature.
* @details Given a device index @p dv_ind, a sensor type @p sensor_type, a
* ::rsmi_temperature_metric_t @p metric and a pointer to an int64_t @p
* temperature, this function will write the value of the metric indicated by
* @p metric and @p sensor_type to the memory location @p temperature.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
* @param[in] sensor_type part of device from which temperature should be
* obtained. This should come from the enum ::rsmi_temperature_type_t
*
* @param[in] metric enum indicated which temperature value should be
* retrieved
@@ -1115,7 +1130,7 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind,
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
rsmi_temperature_metric_t metric, int64_t *temperature);
/** @} */ // end of PhysQuer
+7
Ver fichero
@@ -47,8 +47,10 @@
#include <string>
#include <cstdint>
#include <map>
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi.h"
namespace amd {
namespace smi {
@@ -77,6 +79,7 @@ enum MonitorTypes {
kMonTempOffset,
kMonTempLowest,
kMonTempHighest,
kMonTempLabel,
kMonInvalid = 0xFFFFFFFF,
};
@@ -89,10 +92,14 @@ class Monitor {
const std::string path(void) const {return path_;}
int readMonitor(MonitorTypes type, uint32_t sensor_ind, std::string *val);
int writeMonitor(MonitorTypes type, uint32_t sensor_ind, std::string val);
uint32_t setSensorLabelMap(void);
uint32_t getSensorIndex(rsmi_temperature_type_t type);
private:
std::string MakeMonitorPath(MonitorTypes type, int32_t sensor_id);
std::string path_;
const RocmSMI_env_vars *env_;
std::map<rsmi_temperature_type_t, uint32_t> temp_type_index_map_;
};
} // namespace smi
+17 -14
Ver fichero
@@ -1180,8 +1180,7 @@ get_id_name_str_from_line(uint64_t id, std::string ln,
return ret_str;
}
static rsmi_status_t get_backup_name(uint16_t id, char *name,
size_t len, eNameStrType typ) {
static rsmi_status_t get_backup_name(uint16_t id, char *name, size_t len) {
std::string name_str;
name_str += "0x";
@@ -1291,7 +1290,7 @@ static rsmi_status_t get_dev_name_from_id(uint32_t dv_ind, char *name,
val_str.clear();
return get_backup_name(typ == NAME_STR_DEVICE ?
device_id : subsys_id, name, len, typ);
device_id : subsys_id, name, len);
}
val_str = get_id_name_str_from_line(vendor_id, ln, &ln_str);
@@ -1315,7 +1314,7 @@ static rsmi_status_t get_dev_name_from_id(uint32_t dv_ind, char *name,
// We should have already returned if we were looking for
// device or subdivce
assert(typ == NAME_STR_VENDOR);
return get_backup_name(vendor_id, name, len, typ);
return get_backup_name(vendor_id, name, len);
}
size_t ct = val_str.copy(name, len);
@@ -1467,7 +1466,7 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
}
rsmi_status_t
rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
rsmi_temperature_metric_t metric, int64_t *temperature) {
TRY
@@ -1478,14 +1477,6 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_status_t ret;
amd::smi::MonitorTypes mon_type;
// Make any adjustments to sensor_ind here, if index is not a 0 based. For
// rocm_smi we are using a 0-based index. However, most of the Linux sysfs
// monitor files are 1-based, so we will increment by 1 and make adjustments
// for exceptions later.
// See https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface
++sensor_ind;
switch (metric) {
case RSMI_TEMP_CURRENT:
mon_type = amd::smi::kMonTemp;
@@ -1535,7 +1526,19 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
DEVICE_MUTEX
ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature);
GET_DEV_FROM_INDX
assert(dev->monitor() != nullptr);
std::shared_ptr<amd::smi::Monitor> m = dev->monitor();
uint32_t err = m->setSensorLabelMap();
if (err) {
return errno_to_rsmi_status(err);
}
uint32_t sensor_index =
m->getSensorIndex(static_cast<rsmi_temperature_type_t>(sensor_type));
ret = get_dev_mon_value(mon_type, dv_ind, sensor_index, temperature);
return ret;
CATCH
+46
Ver fichero
@@ -86,6 +86,18 @@ static const char *kMonTempCritMinHystName = "temp#_lcrit_hyst";
static const char *kMonTempOffsetName = "temp#_offset";
static const char *kMonTempLowestName = "temp#_lowest";
static const char *kMonTempHighestName = "temp#_highest";
static const char *kMonTempLabelName = "temp#_label";
static const char *kTempSensorTypeMemoryName = "mem";
static const char *kTempSensorTypeJunctionName = "junction";
static const char *kTempSensorTypeEdgeName = "edge";
static const std::map<std::string, rsmi_temperature_type_t>
kTempSensorNameMap = {
{kTempSensorTypeMemoryName, RSMI_TEMP_TYPE_MEMORY},
{kTempSensorTypeJunctionName, RSMI_TEMP_TYPE_JUNCTION},
{kTempSensorTypeEdgeName, RSMI_TEMP_TYPE_EDGE},
};
static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
{kMonName, kMonNameFName},
@@ -111,6 +123,7 @@ static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
{kMonTempOffset, kMonTempOffsetName},
{kMonTempLowest, kMonTempLowestName},
{kMonTempHighest, kMonTempHighestName},
{kMonTempLabel, kMonTempLabelName},
};
Monitor::Monitor(std::string path, RocmSMI_env_vars const *e) :
@@ -152,6 +165,39 @@ int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id,
return ReadSysfsStr(sysfs_path, val);
}
uint32_t
Monitor::setSensorLabelMap(void) {
std::string type_str;
int ret;
if (temp_type_index_map_.size() > 0) {
return 0; // We've already filled in the map
}
auto add_temp_sensor_entry = [&](uint32_t file_index) {
ret = readMonitor(kMonTempLabel, file_index, &type_str);
if (ret) {
return ret;
}
rsmi_temperature_type_t t_type = kTempSensorNameMap.at(type_str);
temp_type_index_map_.insert({t_type, file_index});
return 0;
};
for (uint32_t i = 1; i <= 3; ++i) {
ret = add_temp_sensor_entry(i);
if (ret) {
return ret;
}
}
return 0;
}
uint32_t
Monitor::getSensorIndex(rsmi_temperature_type_t type) {
return temp_type_index_map_.at(type);
}
} // namespace smi
} // namespace amd
+36 -21
Ver fichero
@@ -48,12 +48,19 @@
#include <iostream>
#include <string>
#include <map>
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi_test/functional/temp_read.h"
#include "rocm_smi_test/test_common.h"
static const std::map<uint32_t, std::string> kTempSensorNameMap = {
{RSMI_TEMP_TYPE_MEMORY, "Memory"},
{RSMI_TEMP_TYPE_JUNCTION, "Junction"},
{RSMI_TEMP_TYPE_EDGE, "Edge"},
};
TestTempRead::TestTempRead() : TestBase() {
set_title("RSMI Temp Read Test");
set_description("The Temperature Read tests verifies that the temperature "
@@ -91,12 +98,14 @@ void TestTempRead::Run(void) {
TestBase::Run();
uint32_t type;
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(i);
auto print_temp_metric = [&](rsmi_temperature_metric_t met,
std::string label) {
err = rsmi_dev_temp_metric_get(i, 0, met, &val_i64);
err = rsmi_dev_temp_metric_get(i, type, met, &val_i64);
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
@@ -115,25 +124,31 @@ void TestTempRead::Run(void) {
"C" << std::endl;
}
};
print_temp_metric(RSMI_TEMP_CURRENT, "Current Temp.");
print_temp_metric(RSMI_TEMP_MAX, "Temperature max value");
print_temp_metric(RSMI_TEMP_MIN, "Temperature min value");
print_temp_metric(RSMI_TEMP_MAX_HYST,
"Temperature hysteresis value for max limit");
print_temp_metric(RSMI_TEMP_MIN_HYST,
"Temperature hysteresis value for min limit");
print_temp_metric(RSMI_TEMP_CRITICAL, "Temperature critical max value");
print_temp_metric(RSMI_TEMP_CRITICAL_HYST,
"Temperature hysteresis value for critical limit");
print_temp_metric(RSMI_TEMP_EMERGENCY,
"Temperature emergency max value");
print_temp_metric(RSMI_TEMP_EMERGENCY_HYST,
"Temperature hysteresis value for emergency limit");
print_temp_metric(RSMI_TEMP_CRIT_MIN, "Temperature critical min value");
print_temp_metric(RSMI_TEMP_CRIT_MIN_HYST,
"Temperature hysteresis value for critical min value");
print_temp_metric(RSMI_TEMP_OFFSET, "Temperature offset");
print_temp_metric(RSMI_TEMP_LOWEST, "Historical minimum temperature");
print_temp_metric(RSMI_TEMP_HIGHEST, "Historical maximum temperature");
for (type = RSMI_TEMP_TYPE_FIRST; type <= RSMI_TEMP_TYPE_LAST; ++type) {
IF_VERB(STANDARD) {
std::cout << "\t** **********" << kTempSensorNameMap.at(type) <<
" Temperatures **********" << std::endl;
}
print_temp_metric(RSMI_TEMP_CURRENT, "Current Temp.");
print_temp_metric(RSMI_TEMP_MAX, "Temperature max value");
print_temp_metric(RSMI_TEMP_MIN, "Temperature min value");
print_temp_metric(RSMI_TEMP_MAX_HYST,
"Temperature hysteresis value for max limit");
print_temp_metric(RSMI_TEMP_MIN_HYST,
"Temperature hysteresis value for min limit");
print_temp_metric(RSMI_TEMP_CRITICAL, "Temperature critical max value");
print_temp_metric(RSMI_TEMP_CRITICAL_HYST,
"Temperature hysteresis value for critical limit");
print_temp_metric(RSMI_TEMP_EMERGENCY,
"Temperature emergency max value");
print_temp_metric(RSMI_TEMP_EMERGENCY_HYST,
"Temperature hysteresis value for emergency limit");
print_temp_metric(RSMI_TEMP_CRIT_MIN, "Temperature critical min value");
print_temp_metric(RSMI_TEMP_CRIT_MIN_HYST,
"Temperature hysteresis value for critical min value");
print_temp_metric(RSMI_TEMP_OFFSET, "Temperature offset");
print_temp_metric(RSMI_TEMP_LOWEST, "Historical minimum temperature");
print_temp_metric(RSMI_TEMP_HIGHEST, "Historical maximum temperature");
}
}
}