Files
rocm-systems/projects/amdsmi/tests/amd_smi_test/functional/temp_read.cc
T
Mario Limonciello (AMD) c9eddf75e7 Remove unnecessary includes
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>


[ROCm/amdsmi commit: 924a06d1e1]
2025-09-05 17:44:17 -05:00

198 wiersze
9.3 KiB
C++

/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <cstdint>
#include <iostream>
#include <string>
#include <map>
#include <gtest/gtest.h>
#include "amd_smi/amdsmi.h"
#include "temp_read.h"
static const std::map<uint32_t, std::string> kTempSensorNameMap = {
{AMDSMI_TEMPERATURE_TYPE_VRAM, "Memory"},
{AMDSMI_TEMPERATURE_TYPE_HOTSPOT, "Hotspot"},
{AMDSMI_TEMPERATURE_TYPE_JUNCTION, "Junction"},
{AMDSMI_TEMPERATURE_TYPE_EDGE, "Edge"},
{AMDSMI_TEMPERATURE_TYPE_HBM_0, "HBM_0"},
{AMDSMI_TEMPERATURE_TYPE_HBM_1, "HBM_1"},
{AMDSMI_TEMPERATURE_TYPE_HBM_2, "HBM_2"},
{AMDSMI_TEMPERATURE_TYPE_HBM_3, "HBM_3"},
{AMDSMI_TEMPERATURE_TYPE_PLX, "PLX"},
// GPU Board Node Temperature Types (100-149)
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_RETIMER_X, "GPU Board Node Retimer X"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC, "GPU Board Node OAM X IBC"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC_2, "GPU Board Node OAM X IBC 2"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_VDD18_VR, "GPU Board Node OAM X VDD18 VR"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_B_VR, "GPU Board Node OAM X 04 HBM B VR"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_D_VR, "GPU Board Node OAM X 04 HBM D VR"},
// GPU Board VR Temperature Types (150-199)
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD0, "GPU Board VDDCR VDD0"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD1, "GPU Board VDDCR VDD1"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD2, "GPU Board VDDCR VDD2"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD3, "GPU Board VDDCR VDD3"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_A, "GPU Board VDDCR SOC A"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_C, "GPU Board VDDCR SOC C"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_A, "GPU Board VDDCR SOCIO A"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_C, "GPU Board VDDCR SOCIO C"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_085_HBM, "GPU Board VDD 085 HBM"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_B, "GPU Board VDDCR 11 HBM B"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_D, "GPU Board VDDCR 11 HBM D"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_USR, "GPU Board VDD USR"},
{AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDIO_11_E32, "GPU Board VDDIO 11 E32"},
// Baseboard System Temperature Types (200+)
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA, "Baseboard UBB FPGA"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FRONT, "Baseboard UBB Front"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_BACK, "Baseboard UBB Back"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM7, "Baseboard UBB OAM7"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_IBC, "Baseboard UBB IBC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_UFPGA, "Baseboard UBB UFPGA"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM1, "Baseboard UBB OAM1"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_HSC, "Baseboard OAM 0-1 HSC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_2_3_HSC, "Baseboard OAM 2-3 HSC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_HSC, "Baseboard OAM 4-5 HSC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_6_7_HSC, "Baseboard OAM 6-7 HSC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_0V72_VR, "Baseboard UBB FPGA 0V72 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_3V3_VR, "Baseboard UBB FPGA 3V3 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_2_3_1V2_VR, "Baseboard Retimer 0-1-2-3 1V2 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_6_7_1V2_VR, "Baseboard Retimer 4-5-6-7 1V2 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_0V9_VR, "Baseboard Retimer 0-1 0V9 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_0V9_VR, "Baseboard Retimer 4-5 0V9 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_2_3_0V9_VR, "Baseboard Retimer 2-3 0V9 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_6_7_0V9_VR, "Baseboard Retimer 6-7 0V9 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_2_3_3V3_VR, "Baseboard OAM 0-1-2-3 3V3 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_6_7_3V3_VR, "Baseboard OAM 4-5-6-7 3V3 VR"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC_HSC, "Baseboard IBC HSC"},
{AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC, "Baseboard IBC"}
};
TestTempRead::TestTempRead() : TestBase() {
set_title("AMDSMI Temp Read Test");
set_description("The Temperature Read tests verifies that the temperature "
"monitors can be read properly.");
}
TestTempRead::~TestTempRead(void) {
}
void TestTempRead::SetUp(void) {
TestBase::SetUp();
return;
}
void TestTempRead::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestTempRead::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestTempRead::Close() {
// This will close handles opened within rsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
void TestTempRead::Run(void) {
amdsmi_status_t err;
int64_t val_i64;
TestBase::Run();
if (setup_failed_) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
return;
}
uint32_t type(0);
for (uint32_t x = 0; x < num_iterations(); ++x) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(processor_handles_[i]);
auto print_temp_metric = [&](amdsmi_temperature_metric_t met,
std::string label) {
err = amdsmi_get_temp_metric(processor_handles_[i], static_cast<amdsmi_temperature_type_t>(type), met, &val_i64);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << label << ": " <<
"Not supported on this machine" << std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_get_temp_metric(processor_handles_[i], static_cast<amdsmi_temperature_type_t>(type), met, nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
return;
} else {
CHK_ERR_ASRT(err)
}
}
// Verify api support checking functionality is working
err = amdsmi_get_temp_metric(processor_handles_[i], static_cast<amdsmi_temperature_type_t>(type), met, nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
IF_VERB(STANDARD) {
std::cout << "\t**" << label << ": " << val_i64 << "C" << std::endl;
}
};
for (type = AMDSMI_TEMPERATURE_TYPE_FIRST; type <= AMDSMI_TEMPERATURE_TYPE__MAX; ++type) {
if (kTempSensorNameMap.find(type) == kTempSensorNameMap.end()) {
continue;
}
IF_VERB(STANDARD) {
std::cout << "\t** **********" << kTempSensorNameMap.at(type) <<
" Temperatures **********" << std::endl;
}
print_temp_metric(AMDSMI_TEMP_CURRENT, "Current Temp.");
print_temp_metric(AMDSMI_TEMP_MAX, "Temperature max value");
print_temp_metric(AMDSMI_TEMP_MIN, "Temperature min value");
print_temp_metric(AMDSMI_TEMP_MAX_HYST,
"Temperature hysteresis value for max limit");
print_temp_metric(AMDSMI_TEMP_MIN_HYST,
"Temperature hysteresis value for min limit");
print_temp_metric(AMDSMI_TEMP_CRITICAL, "Temperature critical max value");
print_temp_metric(AMDSMI_TEMP_CRITICAL_HYST,
"Temperature hysteresis value for critical limit");
print_temp_metric(AMDSMI_TEMP_EMERGENCY,
"Temperature emergency max value");
print_temp_metric(AMDSMI_TEMP_EMERGENCY_HYST,
"Temperature hysteresis value for emergency limit");
print_temp_metric(AMDSMI_TEMP_CRIT_MIN, "Temperature critical min value");
print_temp_metric(AMDSMI_TEMP_CRIT_MIN_HYST,
"Temperature hysteresis value for critical min value");
print_temp_metric(AMDSMI_TEMP_OFFSET, "Temperature offset");
print_temp_metric(AMDSMI_TEMP_LOWEST, "Historical minimum temperature");
print_temp_metric(AMDSMI_TEMP_HIGHEST, "Historical maximum temperature");
}
}
} // x
}