[ROCm/amdsmi commit: 53b7ae1113]
Этот коммит содержится в:
Chris Freehill
2018-09-16 00:13:29 -05:00
родитель db1c389df1
Коммит 455470573e
18 изменённых файлов: 5527 добавлений и 186 удалений
+143 -30
Просмотреть файл
@@ -1,43 +1,98 @@
################################################################################
##
## Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
##
## MIT LICENSE:
## Permission is hereby granted, free of charge, to any person obtaining a copy of
## this software and associated documentation files (the "Software"), to deal in
## the Software without restriction, including without limitation the rights to
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
## of the Software, and to permit persons to whom the Software is furnished to do
## so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
################################################################################
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 2.8.0)
cmake_minimum_required(VERSION 3.5.0)
project(rocm_smi)
set(ROCM_SMI "rocm_smi")
set(RSMI_PACKAGE "rsmi")
set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}")
set(ROCM_SMI_TARGET "${ROCM_SMI}64")
project(${ROCM_SMI_TARGET})
if("${ROCM_SMI_BLD_BITS}" STREQUAL 64)
set (ONLY64STR "64")
set (IS64BIT 1)
else()
set (ONLY64STR "")
set (IS64BIT 0)
## Set default module path if not already set
if(NOT DEFINED CMAKE_MODULE_PATH)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")
endif()
string(TOLOWER "${ROCM_SMI_BUILD_TYPE}" tmp)
if("${tmp}" STREQUAL release)
set(BUILD_TYPE "Release")
set(ISDEBUG 0)
else()
set(BUILD_TYPE "Debug")
set(ISDEBUG 1)
## Include common cmake modules
include(utils)
## Setup the package version.
get_version ("1.0.0")
if (NOT DEFINED CPACK_PACKAGE_VENDOR)
set(CPACK_PACKAGE_VENDOR "AMD")
endif()
set(CMAKE_CXX_FLAGS "-std=c++11 ")
if(ISDEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0")
if (NOT DEFINED CPACK_PACKAGE_VERSION_MAJOR)
set(CPACK_PACKAGE_VERSION_MAJOR "1")
endif()
if (IS64BIT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if (NOT DEFINED CPACK_PACKAGE_VERSION_MINOR)
set(CPACK_PACKAGE_VERSION_MINOR "0")
endif()
set(ROCM_SMI "rocm_smi${ONLY64STR}")
if (NOT DEFINED CPACK_PACKAGE_VERSION_PATCH)
set(CPACK_PACKAGE_VERSION_PATCH "0")
endif()
if (NOT DEFINED CPACK_PACKAGE_CONTACT)
set(CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc.")
endif()
if (NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"ROCm System Management Interface library")
endif()
set(LIB_VERSION_STRING
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
# Debian package specific variables
set(BUILD_VERSION_STRING
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_PATCH}")
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -m64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
# Use this instead of above for 32 bit
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif ()
set(CMAKE_BUILD_TYPE Debug)
set(SRC_DIR "src")
set(INC_DIR "include/rocm_smi")
@@ -45,10 +100,15 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc")
set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_monitor.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h")
# rocm_smi_device.h
@@ -57,10 +117,63 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_monitor.h")
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI})
add_library(${ROCM_SMI} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
install(TARGETS ${ROCM_SMI}
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}
RUNTIME DESTINATION ${PROJECT_BINARY_DIR})
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
## Set the VERSION and SOVERSION values
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}")
set_property(TARGET ${ROCM_SMI_TARGET}
PROPERTY SOVERSION "${CPACK_PACKAGE_VERSION_MAJOR}")
## If the library is a release, strip the target library
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(
TARGET ${ROCM_SMI_TARGET}
POST_BUILD COMMAND ${CMAKE_STRIP} lib${ROCM_SMI_TARGET}.so)
endif ()
## Define default variable and variables for the optional build target
## rocm_smi_lib-dev
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location of rocm_smi source code.")
set(CMAKE_INSTALL_PREFIX "/opt/rocm"
CACHE STRING "Default installation directory.")
set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm"
CACHE STRING "Default packaging prefix.")
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
## Add the install directives for the runtime library.
install(TARGETS ${ROCM_SMI_TARGET}
LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
install(FILES ${SOURCE_DIR}/include/rocm_smi/rocm_smi.h DESTINATION rocm_smi/include/rocm_smi)
## Add the packaging directives for the runtime library.
set(CPACK_PACKAGE_NAME ${ROCM_SMI_PACKAGE})
# TODO set(CPACK_DEBIAN_PACKAGE_HOMEPAGE <GITHUB URL> )
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
"${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm")
# RPM package specific variables
set(CPACK_RPM_PRE_INSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post")
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun")
include (CPack)
# Generate Doxygen documentation
find_package(Doxygen)
if (DOXYGEN_FOUND)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen" VERBATIM)
else()
message("Doxygen is not found. Will not generate documents.")
endif(DOXYGEN_FOUND)
+2 -1
Просмотреть файл
@@ -1,7 +1,7 @@
The University of Illinois/NCSA
Open Source License (NCSA)
Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2014-2018, Advanced Micro Devices, Inc. All rights reserved.
Developed by:
@@ -35,3 +35,4 @@ THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.
+2 -1
Просмотреть файл
@@ -7,7 +7,7 @@ C++ Library interface for ROCm-SMI to allow you to monitor/trace GPU system atri
- GPU Temperature
- GPU Fan Speed - If you have active cooled device with a fan.
Example application is logging performance data like Kernel execution time vs GPU Temprature, GPU and Memory Clocks
Example application is logging performance data like Kerenl execution time vs GPU Temprature, GPU and Memory Clocks
### To build library and example:
mkdir -p build
@@ -23,3 +23,4 @@ cd ..
The above commands will result in building the library librocm_smi.so and
an example, rocm_smi_ex, which links with this library.
+116
Просмотреть файл
@@ -0,0 +1,116 @@
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
## AMD Research and AMD HSA Software Development
##
## Advanced Micro Devices, Inc.
##
## www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## - Redistributions of source code must retain the above copyright notice,
## this list of conditions and the following disclaimers.
## - Redistributions in binary form must reproduce the above copyright
## notice, this list of conditions and the following disclaimers in
## the documentation and#or other materials provided with the distribution.
## - Neither the names of Advanced Micro Devices, Inc,
## nor the names of its contributors may be used to endorse or promote
## products derived from this Software without specific prior written
## permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################
## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
if ( ${STRING_INDEX} GREATER -1 )
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
endif ()
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
list ( LENGTH VERSIONS VERSION_COUNT )
if ( ${VERSION_COUNT} GREATER 0)
list ( GET VERSIONS 0 MAJOR )
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${MAJOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 1 )
list ( GET VERSIONS 1 MINOR )
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 2 )
list ( GET VERSIONS 2 PATCH )
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
endif ()
if ( DEFINED VERSION_BUILD )
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endif ()
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
endfunction ()
## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )
parse_version ( ${DEFAULT_VERSION_STRING} )
find_program ( GIT NAMES git )
if ( GIT )
execute_process ( COMMAND git describe --dirty --long --match [0-9]*
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_STRING
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
if ( ${RESULT} EQUAL 0 )
parse_version ( ${GIT_TAG_STRING} )
endif ()
endif ()
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endfunction()
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+437 -80
Просмотреть файл
@@ -44,107 +44,464 @@
*/
#include <assert.h>
#include <stdint.h>
#include <unistd.h>
#include <vector>
#include <iostream>
#include <bitset>
#include "rocm_smi/rocm_smi.h"
// Call-back function to append to a vector of Devices
static bool GetMonitorDevices(const std::shared_ptr<amd::smi::Device> &d,
void *p) {
std::string val_str;
assert(p != nullptr);
std::vector<std::shared_ptr<amd::smi::Device>> *device_list =
reinterpret_cast<std::vector<std::shared_ptr<amd::smi::Device>> *>(p);
if (d->monitor() != nullptr) {
device_list->push_back(d);
}
return false;
#define CHK_RSMI_RET(RET) { \
if (RET != RSMI_STATUS_SUCCESS) { \
const char *err_str; \
std::cout << "RSMI call returned " << RET \
<< " at line " << __LINE__ << std::endl; \
rsmi_status_string(RET, &err_str); \
std::cout << err_str << std::endl; \
return RET; \
} \
}
#define CHK_RSMI_PERM_RET(RET) { \
if (RET == RSMI_STATUS_PERMISSION) { \
std::cout << "This command requires root access." << std::endl; \
} else { \
CHK_RSMI_RET(RET) \
} \
}
static void print_test_header(const char *str, uint32_t dv_ind) {
std::cout << "********************************" << std::endl;
std::cout << "*** " << str << std::endl;
std::cout << "********************************" << std::endl;
std::cout << "Device index: " << dv_ind << std::endl;
}
static const char *
power_profile_string(rsmi_power_profile_preset_masks profile) {
switch (profile) {
case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
return "CUSTOM";
case RSMI_PWR_PROF_PRST_VIDEO_MASK:
return "VIDEO";
case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
return "POWER SAVING";
case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
return "COMPUTE";
case RSMI_PWR_PROF_PRST_VR_MASK:
return "VR";
case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
return "3D FULL SCREEN";
default:
return "UNKNOWN";
}
}
static const char *
perf_level_string(rsmi_dev_perf_level perf_lvl) {
switch (perf_lvl) {
case RSMI_DEV_PERF_LEVEL_AUTO:
return "AUTO";
case RSMI_DEV_PERF_LEVEL_LOW:
return "LOW";
case RSMI_DEV_PERF_LEVEL_HIGH:
return "HIGH";
case RSMI_DEV_PERF_LEVEL_MANUAL:
return "MANUAL";
default:
return "UNKNOWN";
}
}
static rsmi_status_t test_power_profile(uint32_t dv_ind) {
rsmi_status_t ret;
rsmi_power_profile_status status;
print_test_header("Power Profile", dv_ind);
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_RET(ret)
std::cout << "The available power profiles are:" << std::endl;
uint64_t tmp = 1;
while (tmp <= RSMI_PWR_PROF_PRST_LAST) {
if ((tmp & status.available_profiles) == tmp) {
std::cout << "\t" <<
power_profile_string((rsmi_power_profile_preset_masks)tmp) << std::endl;
}
tmp = tmp << 1;
}
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
// Try setting the profile to a different power profile
rsmi_bit_field diff_profiles;
rsmi_power_profile_preset_masks new_prof;
diff_profiles = status.available_profiles & (~status.current);
if (diff_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) {
new_prof = RSMI_PWR_PROF_PRST_COMPUTE_MASK;
} else if (diff_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) {
new_prof = RSMI_PWR_PROF_PRST_VIDEO_MASK;
} else if (diff_profiles & RSMI_PWR_PROF_PRST_VR_MASK) {
new_prof = RSMI_PWR_PROF_PRST_VR_MASK;
} else if (diff_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) {
new_prof = RSMI_PWR_PROF_PRST_POWER_SAVING_MASK;
} else if (diff_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) {
new_prof = RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK;
} else {
std::cout << "No other non-custom power profiles to set to" << std::endl;
return ret;
}
std::cout << "Setting power profile to " << power_profile_string(new_prof)
<< "..." << std::endl;
ret = rsmi_dev_power_profile_set(dv_ind, 0, new_prof);
CHK_RSMI_RET(ret)
std::cout << "Done." << std::endl;
rsmi_dev_perf_level pfl;
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "Performance Level is now " <<
perf_level_string(pfl) << std::endl;
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_RET(ret)
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
std::cout << "Resetting perf level to auto..." << std::endl;
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
CHK_RSMI_RET(ret)
std::cout << "Done." << std::endl;
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "Performance Level is now " <<
perf_level_string(pfl) << std::endl;
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
CHK_RSMI_RET(ret)
std::cout << "The current power profile is: " <<
power_profile_string(status.current) << std::endl;
return ret;
}
static rsmi_status_t test_power_cap(uint32_t dv_ind) {
rsmi_status_t ret;
uint64_t orig, min, max, new_cap;
print_test_header("Power Control", dv_ind);
ret = rsmi_dev_power_cap_range_get(dv_ind, 0, &max, &min);
CHK_RSMI_RET(ret)
ret = rsmi_dev_power_cap_get(dv_ind, 0, &orig);
CHK_RSMI_RET(ret)
std::cout << "Original Power Cap: " << orig << " uW" << std::endl;
std::cout << "Power Cap Range: " << max << " uW to " << min <<
" uW" << std::endl;
new_cap = (max + min)/2;
std::cout << "Setting new cap to " << new_cap << "..." << std::endl;
ret = rsmi_dev_power_cap_set(dv_ind, 0, new_cap);
CHK_RSMI_RET(ret)
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
CHK_RSMI_RET(ret)
std::cout << "New Power Cap: " << new_cap << " uW" << std::endl;
std::cout << "Resetting cap to " << orig << "..." << std::endl;
ret = rsmi_dev_power_cap_set(dv_ind, 0, orig);
CHK_RSMI_RET(ret)
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
CHK_RSMI_RET(ret)
std::cout << "Current Power Cap: " << new_cap << " uW" << std::endl;
return ret;
}
static rsmi_status_t test_set_overdrive(uint32_t dv_ind) {
rsmi_status_t ret;
uint32_t val;
print_test_header("Overdrive Control", dv_ind);
std::cout << "Set Overdrive level to 0%..." << std::endl;
ret = rsmi_dev_overdrive_level_set(dv_ind, 0);
CHK_RSMI_RET(ret)
std::cout << "Set Overdrive level to 10%..." << std::endl;
ret = rsmi_dev_overdrive_level_set(dv_ind, 10);
CHK_RSMI_RET(ret)
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
CHK_RSMI_RET(ret)
std::cout << "\t**New OverDrive Level:" << val << std::endl;
std::cout << "Reset Overdrive level to 0%..." << std::endl;
ret = rsmi_dev_overdrive_level_set(dv_ind, 0);
CHK_RSMI_RET(ret)
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
CHK_RSMI_RET(ret)
std::cout << "\t**New OverDrive Level:" << val << std::endl;
return ret;
}
static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
rsmi_status_t ret;
int64_t orig_speed;
int64_t new_speed;
int64_t cur_speed;
print_test_header("Fan Speed Control", dv_ind);
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &orig_speed);
CHK_RSMI_RET(ret)
std::cout << "Original fan speed: " << orig_speed << std::endl;
if (orig_speed == 0) {
std::cout << "***System fan speed value is 0. Skip fan test." << std::endl;
return RSMI_STATUS_SUCCESS;
}
new_speed = 1.1 * orig_speed;
std::cout << "Setting fan speed to " << new_speed << std::endl;
ret = rsmi_dev_fan_speed_set(dv_ind, 0, new_speed);
CHK_RSMI_RET(ret)
sleep(4);
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_speed);
CHK_RSMI_RET(ret)
std::cout << "New fan speed: " << cur_speed << std::endl;
assert((cur_speed > 0.95 * new_speed && cur_speed < 1.1 * new_speed) ||
(cur_speed > 0.95 * RSMI_MAX_FAN_SPEED));
std::cout << "Resetting fan control to auto..." << std::endl;
ret = rsmi_dev_fan_reset(dv_ind, 0);
CHK_RSMI_RET(ret)
sleep(3);
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_speed);
CHK_RSMI_RET(ret)
std::cout << "End fan speed: " << cur_speed << std::endl;
return ret;
}
static rsmi_status_t test_set_perf_level(uint32_t dv_ind) {
rsmi_status_t ret;
rsmi_dev_perf_level pfl, orig_pfl;
print_test_header("Performance Level Control", dv_ind);
ret = rsmi_dev_perf_level_get(dv_ind, &orig_pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**Original Perf Level:" << perf_level_string(orig_pfl) <<
std::endl;
pfl = (rsmi_dev_perf_level)((orig_pfl + 1) % (RSMI_DEV_PERF_LEVEL_LAST + 1));
std::cout << "Set Performance Level to " << (uint32_t)pfl << " ..." <<
std::endl;
ret = rsmi_dev_perf_level_set(dv_ind, pfl);
CHK_RSMI_RET(ret)
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << std::endl;
std::cout << "Reset Perf level to " << orig_pfl << " ..." << std::endl;
ret = rsmi_dev_perf_level_set(dv_ind, orig_pfl);
CHK_RSMI_RET(ret)
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << std::endl;
return ret;
}
static rsmi_status_t test_set_freq(uint32_t dv_ind) {
rsmi_status_t ret;
rsmi_frequencies f;
uint32_t freq_bitmask;
rsmi_clk_type rsmi_clk;
print_test_header("Clock Frequency Control", dv_ind);
for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST;
clk <= RSMI_CLK_TYPE_LAST; ++clk) {
rsmi_clk = (rsmi_clk_type)clk;
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
CHK_RSMI_RET(ret)
std::cout << "Initial frequency for clock" << rsmi_clk << " is " <<
f.current << std::endl;
// Set clocks to something other than the usual default of the lowest
// frequency.
freq_bitmask = 0b01100; // Try the 3rd and 4th clocks
std::string freq_bm_str =
std::bitset<RSMI_MAX_NUM_FREQUENCIES>(freq_bitmask).to_string();
freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'),
freq_bm_str.size()-1));
std::cout << "Setting frequency mask for clock " << rsmi_clk <<
" to 0b" << freq_bm_str << " ..." << std::endl;
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
CHK_RSMI_RET(ret)
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
CHK_RSMI_RET(ret)
std::cout << "Frequency is now index " << f.current << std::endl;
std::cout << "Resetting mask to all frequencies." << std::endl;
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
CHK_RSMI_RET(ret)
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
CHK_RSMI_RET(ret)
}
return RSMI_STATUS_SUCCESS;
}
static void print_frequencies(rsmi_frequencies *f) {
assert(f != nullptr);
for (uint32_t j = 0; j < f->num_supported; ++j) {
std::cout << "\t** " << j << ": " << f->frequency[j];
if (j == f->current) {
std::cout << " *";
}
std::cout << std::endl;
}
}
int main() {
amd::smi::RocmSMI hw;
std::vector<std::shared_ptr<amd::smi::Device>> monitor_devices;
rsmi_status_t ret;
// DiscoverDevices() will seach for devices and monitors and update internal
// data structures.
hw.DiscoverDevices();
// IterateSMIDevices will iterate through all the known devices and apply
// the provided call-back to each device found.
hw.IterateSMIDevices(GetMonitorDevices,
reinterpret_cast<void *>(&monitor_devices));
ret = rsmi_init(0);
CHK_RSMI_RET(ret)
std::string val_str;
std::vector<std::string> val_vec;
uint32_t value;
uint32_t value2;
int ret;
uint64_t val_ui64, val2_ui64;
int64_t val_i64;
uint32_t val_ui32;
rsmi_dev_perf_level pfl;
rsmi_frequencies f;
uint32_t num_monitor_devs = 0;
// Iterate through the list of devices and print out information related to
// that device.
for (auto dev : monitor_devices) {
dev->readDevInfo(amd::smi::kDevDevID, &val_str);
std::cout << "\t**Device ID:" << val_str << std::endl;
rsmi_num_monitor_devices(&num_monitor_devs);
for (uint32_t i = 0; i< num_monitor_devs; ++i) {
ret = rsmi_dev_id_get(i, &val_ui64);
CHK_RSMI_RET(ret)
std::cout << "\t**Device ID: 0x" << std::hex << val_ui64 << std::endl;
dev->readDevInfo(amd::smi::kDevPerfLevel, &val_str);
std::cout << "\t**Performance Level:" << val_str << std::endl;
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_RSMI_RET(ret)
std::cout << "\t**Performance Level:" <<
perf_level_string(pfl) << std::endl;
dev->readDevInfo(amd::smi::kDevOverDriveLevel, &val_str);
std::cout << "\t**OverDrive Level:" << val_str << std::endl;
ret = rsmi_dev_overdrive_level_get(i, &val_ui32);
CHK_RSMI_RET(ret)
std::cout << "\t**OverDrive Level:" << val_ui32 << std::endl;
dev->readDevInfo(amd::smi::kDevGPUMClk, &val_vec);
std::cout << "\t**Supported GPU Memory clock frequencies:" << std::endl;
for (auto vs : val_vec) {
std::cout << "\t** " << vs << std::endl;
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f);
CHK_RSMI_RET(ret)
std::cout << "\t**Supported GPU Memory clock frequencies: ";
std::cout << f.num_supported << std::endl;
print_frequencies(&f);
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f);
CHK_RSMI_RET(ret)
std::cout << "\t**Supported GPU clock frequencies: ";
std::cout << f.num_supported << std::endl;
print_frequencies(&f);
char name[20];
ret = rsmi_dev_name_get(i, name, 20);
CHK_RSMI_RET(ret)
std::cout << "\t**Monitor name: " << name << std::endl;
ret = rsmi_dev_temp_metric_get(i, 0, RSMI_TEMP_CURRENT, &val_i64);
CHK_RSMI_RET(ret)
std::cout << "\t**Temperature: " << val_i64/1000 << "C" << std::endl;
ret = rsmi_dev_fan_speed_get(i, 0, &val_i64);
CHK_RSMI_RET(ret)
ret = rsmi_dev_fan_speed_max_get(i, 0, &val_ui64);
CHK_RSMI_RET(ret)
std::cout << "\t**Current Fan Speed: ";
std::cout << val_i64/static_cast<float>(val_ui64)*100;
std::cout << "% ("<< val_i64 << "/" << val_ui64 << ")" << std::endl;
ret = rsmi_dev_fan_rpms_get(i, 0, &val_i64);
CHK_RSMI_RET(ret)
std::cout << "\t**Current fan RPMs: " << val_i64 << std::endl;
ret = rsmi_dev_power_max_get(i, 0, &val_ui64);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
const char *s_str;
ret = rsmi_status_string(RSMI_STATUS_NOT_SUPPORTED, &s_str);
CHK_RSMI_RET(ret)
std::cout << "\t**rsmi_dev_power_max_get(): " << s_str << std::endl;
} else {
CHK_RSMI_PERM_RET(ret)
std::cout << "\t**Max Power Usage: ";
std::cout << static_cast<float>(val_ui64)/1000 << " W" << std::endl;
}
val_vec.clear();
dev->readDevInfo(amd::smi::kDevGPUSClk, &val_vec);
std::cout << "\t**Supported GPU clock frequencies:" << val_str << std::endl;
for (auto vs : val_vec) {
std::cout << "\t** " << vs << std::endl;
}
val_vec.clear();
ret = rsmi_dev_power_cap_get(i, 0, &val_ui64);
CHK_RSMI_PERM_RET(ret)
std::cout << "\t**Current Power Cap: " << val_ui64 << "uW" <<std::endl;
// See if there is a monitor associated with the current device, and if so,
// print out the associated monitor information.
if (dev->monitor() != nullptr) {
ret = dev->monitor()->readMonitor(amd::smi::kMonName, &val_str);
std::cout << "\t**Monitor name: ";
ret = rsmi_dev_power_cap_range_get(i, 0, &val_ui64, &val2_ui64);
CHK_RSMI_PERM_RET(ret)
std::cout << "\t**Power Cap Range: " << val2_ui64 << " to " <<
val_ui64 << " uW" << std::endl;
if (ret != -1) {
std::cout << val_str << std::endl;
} else {
std::cout << "Not available" << std::endl;
}
std::cout << "\t**Temperature: ";
ret = dev->monitor()->readMonitor(amd::smi::kMonTemp, &value);
if (ret != -1) {
std::cout << static_cast<float>(value)/1000.0 << "C" << std::endl;
} else {
std::cout << "Not available" << std::endl;
}
std::cout << "\t**Current Fan Speed: ";
ret = dev->monitor()->readMonitor(amd::smi::kMonMaxFanSpeed, &value);
if (ret == 0) {
ret = dev->monitor()->readMonitor(amd::smi::kMonFanSpeed, &value2);
if (ret != -1) {
std::cout.setf(std::ios::dec, std::ios::basefield);
std::cout << value2/static_cast<float>(value) * 100 << "% (" <<
value2 << "/" << value << ")" << std::endl;
} else {
std::cout << "Not available" << std::endl;
}
}
}
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
CHK_RSMI_PERM_RET(ret)
std::cout << "\t**Averge Power Usage: ";
std::cout << static_cast<float>(val_ui64)/1000 << " W" <<
std::endl;
std::cout << "\t=======" << std::endl;
}
std::cout << "***** Testing write api's" << std::endl;
for (uint32_t i = 0; i< num_monitor_devs; ++i) {
ret = test_set_overdrive(i);
CHK_RSMI_RET(ret)
ret = test_set_perf_level(i);
CHK_RSMI_RET(ret)
ret = test_set_freq(i);
CHK_RSMI_RET(ret)
ret = test_set_fan_speed(i);
CHK_RSMI_RET(ret)
ret = test_power_cap(i);
CHK_RSMI_RET(ret)
ret = test_power_profile(i);
CHK_RSMI_RET(ret)
}
return 0;
}
+719 -3
Просмотреть файл
@@ -44,8 +44,724 @@
*/
#ifndef ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_H_
#define ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_H_
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi_main.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/** \file rocm_smi.h
* Main header file for the ROCm SMI library.
* All required function, structure, enum, etc. definitions should be defined
* in this file.
*/
//! Guaranteed maximum possible number of supported frequencies
#define RSMI_MAX_NUM_FREQUENCIES 32
//! Maximum possible value for fan speed. Should be used as the denominator when
//! determining fan speed percentage.
#define RSMI_MAX_FAN_SPEED 255
/**
* @brief Error codes retured by rocm_smi_lib functions
*/
typedef enum {
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
//!< action is not available for the
//!< given input
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
//!< may because the operation is not
//!< supported by the Linux kernel
//!< version running on the executing
//!< machine
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
//!< error
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
//!< resource
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
//!< allowable or safe range
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
/**
* @brief PowerPlay performance levels
*/
typedef enum {
RSMI_DEV_PERF_LEVEL_AUTO = 0, //!< Performance level is "auto"
RSMI_DEV_PERF_LEVEL_FIRST = RSMI_DEV_PERF_LEVEL_AUTO,
RSMI_DEV_PERF_LEVEL_LOW, //!< Keep PowerPlay levels "low",
//!< regardless of workload
RSMI_DEV_PERF_LEVEL_HIGH, //!< Keep PowerPlay levels "high",
//!< regardless of workload
RSMI_DEV_PERF_LEVEL_MANUAL, //!< Only use values defined by manually
//!< setting the RSMI_CLK_TYPE_SYS speed
RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_MANUAL,
RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 //!< Unknown performance level
} rsmi_dev_perf_level;
/**
* @brief Available clock types.
*/
typedef enum {
RSMI_CLK_TYPE_SYS = 0x0, //!< System clock
RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
RSMI_CLK_TYPE_MEM, //!< Memory clock
RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM
} rsmi_clk_type;
/**
* @brief Temperature Metrics. This enum is used to identify various
* temperature metrics. Corresponding values will be in millidegress
* Celcius.
*/
typedef enum {
RSMI_TEMP_CURRENT = 0x0, //!< Temperature current value.
RSMI_TEMP_FIRST = RSMI_TEMP_CURRENT,
RSMI_TEMP_MAX, //!< Temperature max value.
RSMI_TEMP_MIN, //!< Temperature min value.
RSMI_TEMP_MAX_HYST, //!< Temperature hysteresis value for max limit.
RSMI_TEMP_MIN_HYST, //!< Temperature hysteresis value for min limit.
RSMI_TEMP_CRITICAL, //!< Temperature critical max value, typically
//!< greater than corresponding temp_max values.
RSMI_TEMP_CRITICAL_HYST, //!< Temperature hysteresis value for critical
//!< limit.
RSMI_TEMP_EMERGENCY, //!< Temperature emergency max value, for chips
//!< supporting more than two upper temperature
//!< limits. Must be equal or greater than
//!< corresponding temp_crit values.
RSMI_TEMP_EMERGENCY_HYST, //!< Temperature hysteresis value for emergency
//!< limit.
RSMI_TEMP_CRIT_MIN, //!< Temperature critical min value, typically
//!< lower than corresponding temperature
//!< minimum values.
RSMI_TEMP_CRIT_MIN_HYST, //!< Temperature hysteresis value for critical
//!< minimum limit.
RSMI_TEMP_OFFSET, //!< Temperature offset which is added to the
//! temperature reading by the chip.
RSMI_TEMP_LOWEST, //!< Historical minimum temperature.
RSMI_TEMP_HIGHEST, //!< Historical maximum temperature.
RSMI_TEMP_LAST = RSMI_TEMP_HIGHEST
} rsmi_temperature_metric;
/**
* @brief Pre-set Profile Selections. These bitmasks can be AND'd with the
* rsmi_power_profile_status::available_profiles returned from
* rsmi_dev_power_profile_presets_get() to determine which power profiles
* are supported by the system.
*/
typedef enum {
RSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1, //!< Custom Power Profile
RSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2, //!< Video Power Profile
RSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4, //!< Power Saving Profile
RSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8, //!< Compute Saving Profile
RSMI_PWR_PROF_PRST_VR_MASK = 0x10, //!< VR Power Profile
//!< 3D Full Screen Power Profile
RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20,
RSMI_PWR_PROF_PRST_LAST = RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK,
//!< Invalid power profile
RSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF
} rsmi_power_profile_preset_masks;
/**
* @brief Bitfield used in various RSMI calls
*/
typedef uint64_t rsmi_bit_field;
/**
* @brief Number of possible power profiles that a system could support
*/
#define RSMI_MAX_NUM_POWER_PROFILES sizeof(rsmi_bit_field)
/**
* @brief This structure contains information about which power profiles are
* supported by the system for a given device, and which power profile is
* currently active.
*/
typedef struct {
//!< Which profiles are supported by this system
rsmi_bit_field available_profiles;
//!< Which power profile is currently active
rsmi_power_profile_preset_masks current;
//!< How many power profiles are available
uint32_t num_profiles;
} rsmi_power_profile_status;
/**
* @brief This structure holds information about clock frequencies.
*/
typedef struct {
/**
* The number of supported frequencies
*/
uint32_t num_supported;
/**
* The current frequency index
*/
uint32_t current;
/**
* List of frequencies.
* Only the first num_supported frequencies are valid.
*/
uint64_t frequency[RSMI_MAX_NUM_FREQUENCIES];
} rsmi_frequencies;
/**
* @brief Initialize Rocm SMI.
*
* @details When called, this initializes internal data structures,
* including those corresponding to sources of information that SMI provides.
*
* @param[in] init_flags Bit flags that tell SMI how to initialze. Not
* currently used.
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_init(uint64_t init_flags);
/**
* @brief Shutdown Rocm SMI.
*
* @details Do any necessary clean up.
*/
rsmi_status_t rsmi_shut_down(void);
/**
* @brief Get the number of devices that have monitor information.
*
* @details The number of devices which have monitors is returned. Monitors
* are referenced by the index which can be between 0 and @p num_devices - 1.
*
* @param[inout] num_devices Caller provided pointer to uint32_t. Upon
* successful call, the value num_devices will contain the number of monitor
* devices.
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices);
/**
* @brief Get the device id associated with the device with provided device
* index.
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t @p id,
* this function will write the device id value to the uint64_t pointed to by
* @p id
*
* @param[in] dv_ind a device index
*
* @param[inout] id a pointer to uint64_t to which the device id will be
* written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint64_t *id);
/**
* @brief Get the performance level of the device with provided
* device index.
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t @p
* perf, this function will write the rsmi_dev_perf_level to the uint32_t
* pointed to by @p perf
*
* @param[in] dv_ind a device index
*
* @param[inout] perf a pointer to rsmi_dev_perf_level to which the
* performance level will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind,
rsmi_dev_perf_level *perf);
/**
* @brief Set the PowerPlay performance level associated with the device with
* provided device index with the provided value.
*
* @details Given a device index @p dv_ind and an rsmi_dev_perf_lvl @p
* perf_level, this function will set the PowerPlay performance level for the
* device to the value @p perf_lvl.
*
* @param[in] dv_ind a device index
*
* @param[in] perf_lvl the value to which the performance level should be set
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level perf_lvl);
/**
* @brief Get the overdrive percent associated with the device with provided
* device index.
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t @p od,
* this function will write the overdrive percentage to the uint32_t pointed
* to by @p od
*
* @param[in] dv_ind a device index
*
* @param[inout] od a pointer to uint32_t to which the overdrive percentage
* will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od);
/**
* @brief Set the overdrive percent associated with the device with provided
* device index with the provided value. See details for WARNING.
*
* @details Given a device index @p dv_ind and an overdrive level @p od,
* this function will set the overdrive level for the device to the value
* @p od. The overdrive level is an integer value between 0 and 20, inclusive,
* which represents the overdrive percentage; e.g., a value of 5 specifies
* an overclocking of 5%.
*
* The overdrive level is specific to the gpu system clock.
*
* The overdrive level is the percentage above the maximum Performance Level
* to which overclocking will be limited. The overclocking percentage does
* not apply to clock speeds other than the maximum. This percentage is
* limited to 20%.
*
* ******WARNING******
* Operating your AMD GPU outside of official AMD specifications or outside of
* factory settings, including but not limited to the conducting of
* overclocking (including use of this overclocking software, even if such
* software has been directly or indirectly provided by AMD or otherwise
* affiliated in any way with AMD), may cause damage to your AMD GPU, system
* components and/or result in system failure, as well as cause other problems.
* DAMAGES CAUSED BY USE OF YOUR AMD GPU OUTSIDE OF OFFICIAL AMD SPECIFICATIONS
* OR OUTSIDE OF FACTORY SETTINGS ARE NOT COVERED UNDER ANY AMD PRODUCT
* WARRANTY AND MAY NOT BE COVERED BY YOUR BOARD OR SYSTEM MANUFACTURER'S
* WARRANTY. Please use this utility with caution.
*
* @param[in] dv_ind a device index
*
* @param[in] od the value to which the overdrive level should be set
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od);
/**
* @brief Get the list of possible system clock speeds of device for a
* specified clock type.
*
* @details Given a device index @p dv_ind, a clock type @p clk_type, and a
* pointer to a to an rsmi_frequencies structure @p f, this function will
* fill in @p f with the possible clock speeds, and indication of the current
* clock speed selection.
*
* @param[in] dv_ind a device index
*
* @param[in] clk_type the type of clock for which the frequency is desired
*
* @param[inout] f a pointer to a caller provided rsmi_frequencies structure
* to which the frequency information will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind,
rsmi_clk_type clk_type, rsmi_frequencies *f);
/**
* @brief Control the set of allowed frequencies that can be used for the
* specified clock.
*
* @details Given a device index @p dv_ind, a clock type @p clk_type, and a
* 32 bit bitmask @p freq_bitmask, this function will limit the set of
* allowable frequencies. If a bit in @p freq_bitmask has a value of 1, then
* the frequency (as ordered in an rsmi_frequencies returned by
* rsmi_dev_get_gpu_clk_freq()) corresponding to that bit index will be
* allowed.
*
* This function will change the performance level to
* ::RSMI_DEV_PERF_LEVEL_MANUAL in order to modify the set of allowable
* frequencies. Caller will need to set to ::RSMI_DEV_PERF_LEVEL_AUTO in order
* to get back to default state.
*
* All bits with indices greater than or equal to
* rsmi_frequencies::num_supported will be ignored.
*
* @param[in] dv_ind a device index
*
* @param[in] clk_type the type of clock for which the set of frequencies
* will be modified
*
* @param[in] freq_bitmask A bitmask indicating the indices of the
* frequencies that are to be enabled (1) and disabled (0). Only the lowest
* rsmi_frequencies.num_supported bits of this mask are relevant.
*/
rsmi_status_t rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
rsmi_clk_type clk_type, uint64_t freq_bitmask);
/**
* @brief Get the name of a gpu device.
*
* @details Given a device index @p dv_ind, a pointer to a caller provided
* char buffer @p name, and a length of this buffer @p len, this function
* will write the name of the device (up to @p len characters) buffer @p name.
*
* @param[in] dv_ind a device index
*
* @param[inout] name a pointer to a caller provided char buffer to which the
* speed will be written
*
* @param[in] len the length of the caller provided buffer @p name.
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len);
/**
* @brief Get the temperature metric value for the specifed metric, from the
* specified temperature sensor on the specified device.
*
* @details Given a device index @p dv_ind, a 0-based sensor index
*
* @param sensor_ind, a metric @p metric and a pointer to an int64_t
* @p temperature, this function will write the temperature value for that
* metric in millidegrees Celcius to the int64_t pointed to by @p temperature.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[in] metric enum indicated which temperature value should be
* retrieved
*
* @param[inout] temperature a pointer to int64_t to which the temperature
* will be written, in millidegrees Celcius.
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_temperature_metric metric, int64_t *temperature);
/**
* @brief Reset the fan to automatic driver control
*
* @details This function returns control of the fan to the system
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind);
/**
* @brief Get the fan speed in RPMs of the device with the specified device
* index and 0-based sensor index.
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t
* @p speed, this function will write the current fan speed in RPMs to the
* uint32_t pointed to by @p speed
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] speed a pointer to uint32_t to which the speed will be
* written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind,
int64_t *speed);
/**
* @brief Set the fan speed for the specfied device with the provided speed,
* in RPMs.
*
* @details Given a device index @p dv_ind and a integer value indicating
* speed @p speed, this function will attempt to set the fan speed to @p speed.
* An error will be returned if the specified speed is outside the allowable
* range for the device. The maximum value is RSMI_MAX_FAN_SPEED and the
* minimum is 0.
*
* @param[in] dv_ind a device index
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t
* @p speed, this function will write the current fan speed (a value
* between 0 and 255) to the uint32_t pointed to by @p speed
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] speed a pointer to uint32_t to which the speed will be
* written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_fan_speed_get(uint32_t dv_ind,
uint32_t sensor_ind, int64_t *speed);
/**
* @brief Get the max. fan speed of the device with provided device index.
*
* @details Given a device index @p dv_ind and a pointer to a uint32_t
* @p max_speed, this function will write the maxirsmi_dev_power_profile_semum fan speed possible to
* the uint32_t pointed to by @p max_speed
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] max_speed a pointer to uint32_t to which the maximum speed
* will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind,
uint32_t sensor_ind, uint64_t *max_speed);
/**
* @brief Set the fan speed for the specfied device with the provided speed,
* in RPMs.
*
* @details Given a device index @p dv_ind and a integer value indicating
* speed @p speed, this function will attempt to set the fan speed to @p speed.
* An error will be returned if the specified speed is outside the allowable
* range for the device. The maximum value is 255 and the minimum is 0.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[in] speed the speed to which the function will attempt to set the fan
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind,
uint64_t speed);
/**
* @brief Get the average power consumption of the device with provided
* device index.
*
* @details Given a device index @p dv_ind and a pointer to a uint64_t
* @p power, this function will write the current average power consumption to
* the uint64_t in milliwatts pointed to by @p power. This function requires
* root privilege.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] power a pointer to uint64_t to which the average power
* consumption will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
/**
* @brief Get the cap on power which, when reached, causes the system to take
* action to reduce power.
*
* @details When power use rises above the value @p power, the system will
* take action to reduce power use. The power level returned through
* @p power will be in microWatts.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] cap a pointer to a uint64_t that indicates the power cap,
* in microwatts
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap);
/**
* @brief Get the range of valid values for the power cap
*
* @details This function will return the maximum possible valid power cap
* @p max and the minimum possible valid power cap @p min
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] max a pointer to a uint64_t that indicates the maximum
* possible power cap, in microwatts
*
* @param[inout] min a pointer to a uint64_t that indicates the minimum
* possible power cap, in microwatts
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind,
uint64_t *max, uint64_t *min);
/**
* @brief Set the power cap value
*
* @details This function will set the power cap to the provided value @p cap.
* @p cap must be between the minimum and maximum power cap values set by the
* system, which can be obtained from ::rsmi_dev_power_cap_range_get.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] cap a uint64_t that indicates the desired power cap, in
* microwatts
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap);
/**
* @brief Get the maximum power consumption of the device with provided
* device index.
*
* @details Given a device index @p dv_ind and a pointer to a uint64_t
* @p power, this function will write the current maxium power consumption to
* the uint64_t in milliwatts pointed to by @p power. This function requires
* root privilege.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] power a pointer to uint64_t to which the maximum power
* consumption will be written
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
/**
* @brief Get the list of available preset power profiles and an indication of
* which profile is currently active.
*
* @details Given a device index @p dv_ind and a pointer to a
* rsmi_power_profile_status @p status, this function will set the bits of
* the rsmi_power_profile_status.available_profiles bit field of @p status to
* 1 if the profile corresponding to the respective
* rsmi_power_profile_preset_masks profiles are enabled. For example, if both
* the VIDEO and VR power profiles are available selections, then
* RSMI_PWR_PROF_PRST_VIDEO_MASK AND'ed with
* rsmi_power_profile_status.available_profiles will be non-zero as will
* RSMI_PWR_PROF_PRST_VR_MASK AND'ed with
* rsmi_power_profile_status.available_profiles. Additionally,
* rsmi_power_profile_status.current will be set to the
* rsmi_power_profile_preset_masks of the profile that is currently active.
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[inout] status a pointer to rsmi_power_profile_status that will be
* populated by a call to this function
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_power_profile_status *status);
/**
* @brief Set the power profile
*
* @details Given a device index @p dv_ind, a sensor index sensor_ind, and a
* @p profile, this function will attempt to set the current profile to the
* provided profile. The provided profile must be one of the currently
* supported profiles, as indicated by a call to
* ::rsmi_dev_power_profile_presets_get()
*
* @param[in] dv_ind a device index
*
* @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0.
* If a device has more than one sensor, it could be greater than 0.
*
* @param[in] profile a rsmi_power_profile_preset_masks that hold the mask
* of the desired new power profile
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call.
*
*/
rsmi_status_t
rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_power_profile_preset_masks profile);
/**
* @brief Get a description of a provided RSMI error status
*
* @details Set the provided pointer to a const char *, @p status_string, to
* a string containing a description of the provided error code @p status.
*
* @param[in] status The error status for which a description is desired
*
* @param[inout] status_string A pointer to a const char * which will be made
* to point to a description of the provided error code
*
* @retval RSMI_STATUS_SUCCESS is returned upon successful call
*
*/
rsmi_status_t
rsmi_status_string(rsmi_status_t status, const char **status_string);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_H_
+12 -3
Просмотреть файл
@@ -51,6 +51,7 @@
#include <vector>
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_power_mon.h"
namespace amd {
namespace smi {
@@ -60,7 +61,8 @@ enum DevInfoTypes {
kDevOverDriveLevel,
kDevDevID,
kDevGPUMClk,
kDevGPUSClk
kDevGPUSClk,
kDevPowerProfileMode
};
class Device {
@@ -71,19 +73,26 @@ class Device {
void set_monitor(std::shared_ptr<Monitor> m) {monitor_ = m;}
std::string path(void) const {return path_;}
const std::shared_ptr<Monitor>& monitor() {return monitor_;}
const std::shared_ptr<PowerMon>& power_monitor() {return power_monitor_;}
void set_power_monitor(std::shared_ptr<PowerMon> pm) {power_monitor_ = pm;}
int readDevInfo(DevInfoTypes type, uint32_t *val);
int readDevInfo(DevInfoTypes type, std::string *val);
int readDevInfo(DevInfoTypes type, std::vector<std::string> *retVec);
int writeDevInfo(DevInfoTypes type, uint64_t val);
int writeDevInfo(DevInfoTypes type, std::string val);
uint32_t index(void) const {return index_;}
void set_index(uint32_t index) {index_ = index;}
private:
std::shared_ptr<Monitor> monitor_;
std::shared_ptr<PowerMon> power_monitor_;
std::string path_;
uint32_t index_;
int readDevInfoStr(DevInfoTypes type, std::string *retStr);
int readDevInfoMultiLineStr(DevInfoTypes type,
std::vector<std::string> *retVec);
int writeDevInfoStr(DevInfoTypes type, std::string valStr);
};
} // namespace smi
+16 -2
Просмотреть файл
@@ -50,31 +50,45 @@
#include <functional>
#include <set>
#include <string>
#include <cstdint>
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_power_mon.h"
namespace amd {
namespace smi {
class RocmSMI {
public:
RocmSMI(void);
RocmSMI(void); // direct use of this constructor is deprecated; use
// getInstance()
~RocmSMI(void);
static RocmSMI& getInstance(void);
static std::vector<std::shared_ptr<amd::smi::Device>>&
monitor_devices() {return s_monitor_devices;}
uint32_t DiscoverDevices(void);
uint32_t DiscoverAMDPowerMonitors(bool force_update = false);
// Will execute "func" for every Device object known about, or until func
// returns true;
void IterateSMIDevices(
std::function<bool(std::shared_ptr<Device>&, void *)> func, void *);
private:
// temporarily make public RocmSMI(void); // force use getInstance()
std::vector<std::shared_ptr<Device>> devices_;
std::vector<std::shared_ptr<Monitor>> monitors_;
std::vector<std::shared_ptr<PowerMon>> power_mons_;
std::set<std::string> amd_monitor_types_;
void AddToDeviceList(std::string dev_name);
uint32_t DiscoverAMDMonitors(void);
static std::vector<std::shared_ptr<amd::smi::Device>> s_monitor_devices;
};
} // namespace smi
+23 -4
Просмотреть файл
@@ -56,6 +56,26 @@ enum MonitorTypes {
kMonTemp, // Temperature in millidegrees
kMonFanSpeed,
kMonMaxFanSpeed,
kMonFanRPMs,
kMonFanCntrlEnable,
kMonPowerCap,
kMonPowerCapMax,
kMonPowerCapMin,
kMonTempMax,
kMonTempMin,
kMonTempMaxHyst,
kMonTempMinHyst,
kMonTempCritical,
kMonTempCriticalHyst,
kMonTempEmergency,
kMonTempEmergencyHyst,
kMonTempCritMin,
kMonTempCritMinHyst,
kMonTempOffset,
kMonTempLowest,
kMonTempHighest,
kMonInvalid = 0xFFFFFFFF,
};
@@ -64,12 +84,11 @@ class Monitor {
explicit Monitor(std::string path);
~Monitor(void);
const std::string path(void) const {return path_;}
int readMonitor(MonitorTypes type, uint32_t *val);
int readMonitor(MonitorTypes type, std::string *val);
int readMonitor(MonitorTypes type, uint32_t sensor_ind, std::string *val);
int writeMonitor(MonitorTypes type, uint32_t sensor_ind, std::string val);
private:
std::string MakeMonitorPath(MonitorTypes type, int32_t sensor_id);
std::string path_;
int readMonitorStr(MonitorTypes type, std::string *retStr);
};
} // namespace smi
+78
Просмотреть файл
@@ -0,0 +1,78 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_POWER_MON_H_
#define ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_POWER_MON_H_
#include <string>
#include <cstdint>
namespace amd {
namespace smi {
enum PowerMonTypes {
kPowerMaxGPUPower,
kPowerAveGPUPower,
};
class PowerMon {
public:
explicit PowerMon(std::string path);
~PowerMon(void);
const std::string path(void) const {return path_;}
uint32_t dev_index(void) const {return dev_index_;}
void set_dev_index(uint32_t ind) {dev_index_ = ind;}
int readPowerValue(PowerMonTypes type, uint64_t *power);
private:
uint32_t dev_index_;
std::string path_;
};
} // namespace smi
} // namespace amd
#endif // ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_POWER_MON_H_
+71
Просмотреть файл
@@ -0,0 +1,71 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
#define ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
#include <string>
#include <cstdint>
#ifdef NDEBUG
#define debug_print(fmt, ...) \
do { \
} while (false)
#else
#define debug_print(fmt, ...) \
do { \
fprintf(stderr, fmt, ##__VA_ARGS__); \
} while (false)
#endif
namespace amd {
namespace smi {
int ReadSysfsStr(std::string path, std::string *retStr);
int WriteSysfsStr(std::string path, std::string val);
} // namespace smi
} // namespace amd
#endif // ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
+969 -1
Просмотреть файл
@@ -43,5 +43,973 @@
*
*/
#include "rocm_smi/rocm_smi.h"
#include <assert.h>
#include <errno.h>
#include <sstream>
#include <algorithm>
#include <cerrno>
#include <bitset>
#include <cstdint>
#include <unordered_map>
#include <map>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi_utils.h"
static const uint32_t kMaxOverdriveLevel = 20;
static rsmi_status_t handleException() {
try {
throw;
} catch (const std::bad_alloc& e) {
debug_print("RSMI exception: BadAlloc\n");
return RSMI_STATUS_OUT_OF_RESOURCES;
} catch (const std::exception& e) {
debug_print("Unhandled exception: %s\n", e.what());
assert(false && "Unhandled exception.");
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (const std::nested_exception& e) {
debug_print("Callback threw, forwarding.\n");
e.rethrow_nested();
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (...) {
assert(false && "Unhandled exception.");
abort();
return RSMI_STATUS_INTERNAL_EXCEPTION;
}
}
#define TRY try {
#define CATCH } catch (...) {return handleException();}
#define GET_DEV_FROM_INDX \
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); \
if (dv_ind >= smi.monitor_devices().size()) { \
return RSMI_STATUS_INVALID_ARGS; \
} \
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
assert(dev != nullptr);
static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
switch (err) {
case 0: return RSMI_STATUS_SUCCESS;
case EACCES: return RSMI_STATUS_PERMISSION;
case EPERM: return RSMI_STATUS_NOT_SUPPORTED;
case ENOENT: return RSMI_STATUS_FILE_ERROR;
default: return RSMI_STATUS_UNKNOWN_ERROR;
}
}
/**
* Parse a string of the form "<int index>: <int freq><freq. unit string> <|*>"
*/
static uint32_t freq_string_to_int(std::string freq_line, bool *is_curr) {
assert(is_curr != nullptr);
std::istringstream fs(freq_line);
uint32_t ind;
uint32_t freq;
std::string junk;
std::string units_str;
std::string star_str;
fs >> ind;
fs >> junk; // colon
fs >> freq;
fs >> units_str;
fs >> star_str;
if (is_curr != nullptr) {
if (freq_line.find("*") != std::string::npos) {
*is_curr = true;
} else {
*is_curr = false;
}
}
uint32_t multiplier = 0;
if (units_str == "Mhz") {
multiplier = 1000000;
} else if (units_str == "Ghz") {
multiplier = 1000000000;
} else if (units_str == "Khz") {
multiplier = 1000;
} else if (units_str == "Hz") {
multiplier = 1;
} else {
assert(!"Unexpected units for frequency");
}
return freq*multiplier;
}
/**
* Parse a string of the form "<int index> <mode name string> <|*>"
*/
static rsmi_power_profile_preset_masks
power_prof_string_to_int(std::string pow_prof_line, bool *is_curr) {
std::istringstream fs(pow_prof_line);
uint32_t ind;
std::string mode;
size_t tmp;
rsmi_power_profile_preset_masks ret = RSMI_PWR_PROF_PRST_INVALID;
fs >> ind;
fs >> mode;
while (1) {
tmp = mode.find_last_of("* :");
if (tmp == std::string::npos) {
break;
}
mode = mode.substr(0, tmp);
}
if (is_curr != nullptr) {
if (pow_prof_line.find("*") != std::string::npos) {
*is_curr = true;
} else {
*is_curr = false;
}
}
const std::unordered_map<std::string, std::function<void()>> mode_map {
{"3D_FULL_SCREEN", [&](){ ret = RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK; }},
{"POWER_SAVING", [&](){ ret = RSMI_PWR_PROF_PRST_POWER_SAVING_MASK; }},
{"VIDEO", [&](){ ret = RSMI_PWR_PROF_PRST_VIDEO_MASK; }},
{"VR", [&](){ ret = RSMI_PWR_PROF_PRST_VR_MASK; }},
{"COMPUTE", [&](){ ret = RSMI_PWR_PROF_PRST_COMPUTE_MASK; }},
{"CUSTOM", [&](){ ret = RSMI_PWR_PROF_PRST_CUSTOM_MASK; }},
};
auto mode_iter = mode_map.find(mode);
if (mode_iter != mode_map.end()) {
mode_iter->second();
}
return ret;
}
static rsmi_status_t get_dev_value_str(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::string *val_str) {
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_str);
return errno_to_rsmi_status(ret);
}
static rsmi_status_t set_dev_value(amd::smi::DevInfoTypes type,
uint32_t dv_ind, uint64_t val) {
GET_DEV_FROM_INDX
int ret = dev->writeDevInfo(type, val);
return errno_to_rsmi_status(ret);
}
static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
uint32_t dv_ind, uint32_t sensor_ind, int64_t *val) {
GET_DEV_FROM_INDX
assert(dev->monitor() != nullptr);
std::string val_str;
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
if (ret) {
return errno_to_rsmi_status(ret);
}
*val = std::stoi(val_str);
return RSMI_STATUS_SUCCESS;
}
static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
uint32_t dv_ind, uint32_t sensor_ind, uint64_t *val) {
GET_DEV_FROM_INDX
assert(dev->monitor() != nullptr);
std::string val_str;
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
if (ret) {
return errno_to_rsmi_status(ret);
}
*val = std::stoul(val_str);
return RSMI_STATUS_SUCCESS;
}
template <typename T>
static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
uint32_t dv_ind, int32_t sensor_ind, T val) {
GET_DEV_FROM_INDX
assert(dev->monitor() != nullptr);
int ret = dev->monitor()->writeMonitor(type, sensor_ind,
std::to_string(val));
return errno_to_rsmi_status(ret);
}
static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
uint32_t dv_ind, uint64_t *val) {
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size() || val == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
uint32_t ret = smi.DiscoverAMDPowerMonitors();
if (ret == EACCES) {
return RSMI_STATUS_PERMISSION;
} else if (ret != 0) {
return RSMI_STATUS_FILE_ERROR;
}
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
assert(dev != nullptr);
assert(dev->monitor() != nullptr);
ret = dev->power_monitor()->readPowerValue(type, val);
return errno_to_rsmi_status(ret);
}
static rsmi_status_t get_dev_mon_value_str(amd::smi::MonitorTypes type,
uint32_t dv_ind, int32_t sensor_ind, std::string *val_str) {
GET_DEV_FROM_INDX
assert(dev->monitor() != nullptr);
int ret = dev->monitor()->readMonitor(type, sensor_ind, val_str);
return errno_to_rsmi_status(ret);
}
static rsmi_status_t get_dev_value_vec(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::vector<std::string> *val_vec) {
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_vec);
return errno_to_rsmi_status(ret);
}
// A call to rsmi_init is not technically necessary at this time, but may be
// in the future.
rsmi_status_t
rsmi_init(uint64_t init_flags) {
TRY
(void)init_flags; // unused for now; for future use
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
return RSMI_STATUS_SUCCESS;
CATCH
}
// A call to rsmi_shut_down is not technically necessary at this time,
// but may be in the future.
rsmi_status_t
rsmi_shut_down(void) {
TRY
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_num_monitor_devices(uint32_t *num_devices) {
TRY
if (num_devices == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
*num_devices = smi.monitor_devices().size();
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_id_get(uint32_t dv_ind, uint64_t *id) {
TRY
std::string val_str;
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevDevID, dv_ind, &val_str);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
errno = 0;
*id = strtoul(val_str.c_str(), nullptr, 16);
assert(errno == 0);
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level *perf) {
TRY
std::string val_str;
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevPerfLevel, dv_ind,
&val_str);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
if (val_str == "auto") {
*perf = RSMI_DEV_PERF_LEVEL_AUTO;
} else if (val_str == "low") {
*perf = RSMI_DEV_PERF_LEVEL_LOW;
} else if (val_str == "high") {
*perf = RSMI_DEV_PERF_LEVEL_HIGH;
} else if (val_str == "manual") {
*perf = RSMI_DEV_PERF_LEVEL_MANUAL;
} else {
*perf = RSMI_DEV_PERF_LEVEL_UNKNOWN;
}
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) {
TRY
std::string val_str;
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevOverDriveLevel, dv_ind,
&val_str);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
errno = 0;
*od = strtoul(val_str.c_str(), nullptr, 10);
assert(errno == 0);
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) {
TRY
if (od > kMaxOverdriveLevel) {
return RSMI_STATUS_INVALID_ARGS;
}
return set_dev_value(amd::smi::kDevOverDriveLevel, dv_ind, od);
CATCH
}
rsmi_status_t
rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level perf_level) {
TRY
if (perf_level > RSMI_DEV_PERF_LEVEL_LAST) {
return RSMI_STATUS_INVALID_ARGS;
}
return set_dev_value(amd::smi::kDevPerfLevel, dv_ind, perf_level);
CATCH
}
static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type,
uint32_t dv_ind, rsmi_frequencies *f) {
TRY
std::vector<std::string> val_vec;
rsmi_status_t ret;
if (f == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
ret = get_dev_value_vec(type, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
assert(val_vec.size() <= RSMI_MAX_NUM_FREQUENCIES);
f->num_supported = val_vec.size();
bool current = false;
f->current = RSMI_MAX_NUM_FREQUENCIES + 1; // init to an invalid value
for (uint32_t i = 0; i < f->num_supported; ++i) {
f->frequency[i] = freq_string_to_int(val_vec[i], &current);
if (current) {
// Should only be 1 current frequency
assert(f->current == RSMI_MAX_NUM_FREQUENCIES + 1);
f->current = i;
}
}
assert(f->current < f->num_supported);
return RSMI_STATUS_SUCCESS;
CATCH
}
static rsmi_status_t get_power_profiles(uint32_t dv_ind,
rsmi_power_profile_status *p,
std::map<rsmi_power_profile_preset_masks, uint32_t> *ind_map) {
TRY
std::vector<std::string> val_vec;
rsmi_status_t ret;
if (p == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
ret = get_dev_value_vec(amd::smi::kDevPowerProfileMode, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
assert(val_vec.size() <= RSMI_MAX_NUM_POWER_PROFILES);
p->num_profiles = val_vec.size() - 1; // -1 for the header line
bool current = false;
p->current = RSMI_PWR_PROF_PRST_INVALID; // init to an invalid value
p->available_profiles = 0;
rsmi_power_profile_preset_masks prof;
for (uint32_t i = 1; i < val_vec.size(); ++i) {
prof = power_prof_string_to_int(val_vec[i], &current);
if (prof == RSMI_PWR_PROF_PRST_INVALID) {
return RSMI_STATUS_NOT_SUPPORTED;
}
if (ind_map != nullptr) {
(*ind_map)[prof] = i-1;
}
p->available_profiles |= prof;
if (current) {
// Should only be 1 current profile
assert(p->current == RSMI_PWR_PROF_PRST_INVALID);
p->current = prof;
}
}
assert(p->current != RSMI_PWR_PROF_PRST_INVALID);
return RSMI_STATUS_SUCCESS;
CATCH
}
static bool is_power_of_2(uint64_t n) {
return n && !(n & (n - 1));
}
static rsmi_status_t set_power_profile(uint32_t dv_ind,
rsmi_power_profile_preset_masks profile) {
TRY
rsmi_status_t ret;
rsmi_power_profile_status avail_profiles = {0, RSMI_PWR_PROF_PRST_INVALID, 0};
// TODO(cf): test if it is valid to OR profiles; if not the following is
// not necessary:
// Determine if the provided profile is valid
if (!is_power_of_2(profile)) {
return RSMI_STATUS_INPUT_OUT_OF_BOUNDS;
}
std::map<rsmi_power_profile_preset_masks, uint32_t> ind_map;
ret = get_power_profiles(dv_ind, &avail_profiles, &ind_map);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
if (!(profile & avail_profiles.available_profiles)) {
return RSMI_STATUS_INPUT_OUT_OF_BOUNDS;
}
assert(ind_map.find(profile) != ind_map.end());
// Set perf. level to manual so that we can then set the power profile
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
// Write the new profile
ret = set_dev_value(amd::smi::kDevPowerProfileMode, dv_ind,
ind_map[profile]);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type clk_type,
rsmi_frequencies *f) {
TRY
switch (clk_type) {
case RSMI_CLK_TYPE_SYS:
return get_frequencies(amd::smi::kDevGPUSClk, dv_ind, f);
break;
case RSMI_CLK_TYPE_MEM:
return get_frequencies(amd::smi::kDevGPUMClk, dv_ind, f);
break;
default:
return RSMI_STATUS_INVALID_ARGS;
}
CATCH
}
static std::string bitfield_to_freq_string(uint64_t bitf,
uint32_t num_supported) {
std::string bf_str("");
std::bitset<RSMI_MAX_NUM_FREQUENCIES> bs(bitf);
for (uint32_t i = 0; i < num_supported; ++i) {
if (bs[i]) {
bf_str += std::to_string(i);
bf_str += " ";
}
}
return bf_str;
}
rsmi_status_t
rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
rsmi_clk_type clk_type, uint64_t freq_bitmask) {
rsmi_status_t ret;
rsmi_frequencies freqs;
TRY
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, clk_type, &freqs);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
assert(freqs.num_supported <= RSMI_MAX_NUM_FREQUENCIES);
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
// Above call to rsmi_dev_get_gpu_clk_freq should have emitted an error if
// assert below is not true
assert(dv_ind < smi.monitor_devices().size());
std::string freq_enable_str =
bitfield_to_freq_string(freq_bitmask, freqs.num_supported);
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
assert(dev != nullptr);
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
int ret_i;
switch (clk_type) {
case RSMI_CLK_TYPE_SYS:
ret_i = dev->writeDevInfo(amd::smi::kDevGPUSClk, freq_enable_str);
return errno_to_rsmi_status(ret_i);
break;
case RSMI_CLK_TYPE_MEM:
ret_i = dev->writeDevInfo(amd::smi::kDevGPUMClk, freq_enable_str);
return errno_to_rsmi_status(ret_i);
break;
default:
return RSMI_STATUS_INVALID_ARGS;
}
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) {
TRY
if (name == nullptr || len == 0) {
return RSMI_STATUS_INVALID_ARGS;
}
std::string val_str;
rsmi_status_t ret;
ret = get_dev_mon_value_str(amd::smi::kMonName, dv_ind, -1, &val_str);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
size_t ln = val_str.copy(name, len);
name[std::min(len - 1, ln)] = '\0';
return RSMI_STATUS_SUCCESS;
CATCH
}
rsmi_status_t
rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_temperature_metric metric, int64_t *temperature) {
TRY
if (temperature == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
rsmi_status_t ret;
amd::smi::MonitorTypes mon_type;
// Make any adjustments to sensor_ind here, if index is not a 0 based. For
// rocm_smi we are using a 0-based index. However, most of the Linux sysfs
// monitor files are 1-based, so we will increment by 1 and make adjustments
// for exceptions later.
// See https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface
++sensor_ind;
switch (metric) {
case RSMI_TEMP_CURRENT:
mon_type = amd::smi::kMonTemp;
break;
case RSMI_TEMP_MAX:
mon_type = amd::smi::kMonTempMax;
break;
case RSMI_TEMP_MIN:
mon_type = amd::smi::kMonTempMin;
break;
case RSMI_TEMP_MAX_HYST:
mon_type = amd::smi::kMonTempMaxHyst;
break;
case RSMI_TEMP_MIN_HYST:
mon_type = amd::smi::kMonTempMinHyst;
break;
case RSMI_TEMP_CRITICAL:
mon_type = amd::smi::kMonTempCritical;
break;
case RSMI_TEMP_CRITICAL_HYST:
mon_type = amd::smi::kMonTempCriticalHyst;
break;
case RSMI_TEMP_EMERGENCY:
mon_type = amd::smi::kMonTempEmergency;
break;
case RSMI_TEMP_EMERGENCY_HYST:
mon_type = amd::smi::kMonTempEmergencyHyst;
break;
case RSMI_TEMP_CRIT_MIN:
mon_type = amd::smi::kMonTempCritMin;
break;
case RSMI_TEMP_CRIT_MIN_HYST:
mon_type = amd::smi::kMonTempCritMinHyst;
break;
case RSMI_TEMP_OFFSET:
mon_type = amd::smi::kMonTempOffset;
break;
case RSMI_TEMP_LOWEST:
mon_type = amd::smi::kMonTempLowest;
break;
case RSMI_TEMP_HIGHEST:
mon_type = amd::smi::kMonTempHighest;
break;
default:
mon_type = amd::smi::kMonInvalid;
}
ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
TRY
if (speed == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
rsmi_status_t ret;
++sensor_ind; // fan sysfs files have 1-based indices
ret = get_dev_mon_value(amd::smi::kMonFanSpeed, dv_ind, sensor_ind, speed);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
TRY
if (speed == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
++sensor_ind; // fan sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_dev_mon_value(amd::smi::kMonFanRPMs, dv_ind, sensor_ind, speed);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) {
TRY
rsmi_status_t ret;
++sensor_ind; // fan sysfs files have 1-based indices
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable,
dv_ind, sensor_ind, 2);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) {
TRY
rsmi_status_t ret;
uint64_t max_speed;
ret = rsmi_dev_fan_speed_max_get(dv_ind, sensor_ind, &max_speed);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
if (speed > max_speed) {
return RSMI_STATUS_INPUT_OUT_OF_BOUNDS;
}
++sensor_ind; // fan sysfs files have 1-based indices
// First need to set fan mode (pwm1_enable) to 1 (aka, "manual")
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable, dv_ind,
sensor_ind, 1);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanSpeed, dv_ind,
sensor_ind, speed);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind,
uint64_t *max_speed) {
TRY
if (max_speed == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
++sensor_ind; // fan sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_dev_mon_value(amd::smi::kMonMaxFanSpeed, dv_ind, sensor_ind,
reinterpret_cast<int64_t *>(max_speed));
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
TRY
if (power == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
(void)sensor_ind; // Not used yet
// ++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_power_mon_value(amd::smi::kPowerMaxGPUPower, dv_ind, power);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
TRY
if (power == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
(void)sensor_ind; // Not used yet
// ++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_power_mon_value(amd::smi::kPowerAveGPUPower, dv_ind, power);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) {
TRY
if (cap == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_dev_mon_value(amd::smi::kMonPowerCap, dv_ind, sensor_ind, cap);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind,
uint64_t *max, uint64_t *min) {
TRY
if (max == nullptr || min == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
ret = get_dev_mon_value(amd::smi::kMonPowerCapMax, dv_ind, sensor_ind, max);
if (ret == RSMI_STATUS_SUCCESS) {
ret = get_dev_mon_value(amd::smi::kMonPowerCapMin, dv_ind,
sensor_ind, min);
}
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) {
TRY
rsmi_status_t ret;
uint64_t min, max;
ret = rsmi_dev_power_cap_range_get(dv_ind, sensor_ind, &max, &min);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
// All rsmi_* calls that use sensor_ind should use the 0-based value,
// so increment this after the call above.
++sensor_ind; // power sysfs files have 1-based indices
if (cap > max || cap < min) {
return RSMI_STATUS_INVALID_ARGS;
}
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonPowerCap, dv_ind,
sensor_ind, cap);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_power_profile_status *status) {
TRY
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret = get_power_profiles(dv_ind, status, nullptr);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_power_profile_preset_masks profile) {
TRY
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret = set_power_profile(dv_ind, profile);
return ret;
CATCH
}
rsmi_status_t
rsmi_status_string(rsmi_status_t status, const char **status_string) {
TRY
if (status_string == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
const size_t status_u = static_cast<size_t>(status);
switch (status_u) {
case RSMI_STATUS_SUCCESS:
*status_string = "RSMI_STATUS_SUCCESS: The function has been executed"
" successfully.";
break;
case RSMI_STATUS_INVALID_ARGS:
*status_string =
"RSMI_STATUS_INVALID_ARGS: The provided arguments do not"
" meet the preconditions required for calling this function.";
break;
case RSMI_STATUS_NOT_SUPPORTED:
*status_string = "RSMI_STATUS_NOT_SUPPORTED: This function is not"
" supported in the current environment.";
break;
case RSMI_STATUS_FILE_ERROR:
*status_string =
"RSMI_STATUS_FILE_ERROR: There was an error in finding or"
" opening a file or directory. The operation may not be supported by "
"this Linux kernel version.";
break;
case RSMI_STATUS_PERMISSION:
*status_string = "RSMI_STATUS_PERMISSION: The user ID of the calling"
" process does not have sufficient permission to execute a command."
" Often this is fixed by running as root (sudo).";
break;
case RSMI_STATUS_OUT_OF_RESOURCES:
*status_string = "Unable to acquire memory or other resource";
break;
case RSMI_STATUS_INTERNAL_EXCEPTION:
*status_string = "An internal exception was caught";
break;
case RSMI_STATUS_INPUT_OUT_OF_BOUNDS:
*status_string = "The provided input is out of allowable or safe range";
break;
default:
*status_string = "An unknown error occurred";
return RSMI_STATUS_UNKNOWN_ERROR;
}
return RSMI_STATUS_SUCCESS;
CATCH
}
+99 -16
Просмотреть файл
@@ -43,7 +43,7 @@
#include <assert.h>
#include <sys/stat.h>
#include <stdint.h>
#include <string>
#include <map>
#include <fstream>
@@ -54,6 +54,7 @@
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi.h"
namespace amd {
namespace smi {
@@ -63,6 +64,12 @@ static const char *kDevDevIDFName = "device";
static const char *kDevOverDriveLevelFName = "pp_sclk_od";
static const char *kDevGPUSClkFName = "pp_dpm_sclk";
static const char *kDevGPUMClkFName = "pp_dpm_mclk";
static const char *kDevPowerProfileModeName = "pp_power_profile_mode";
static const char *kDevPerfLevelAutoStr = "auto";
static const char *kDevPerfLevelLowStr = "low";
static const char *kDevPerfLevelHighStr = "high";
static const char *kDevPerfLevelManualStr = "manual";
static const char *kDevPerfLevelUnknownStr = "unknown";
static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevPerfLevel, kDevPerfLevelFName},
@@ -70,6 +77,15 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevDevID, kDevDevIDFName},
{kDevGPUMClk, kDevGPUMClkFName},
{kDevGPUSClk, kDevGPUSClkFName},
{kDevPowerProfileMode, kDevPowerProfileModeName},
};
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
{RSMI_DEV_PERF_LEVEL_AUTO, kDevPerfLevelAutoStr},
{RSMI_DEV_PERF_LEVEL_LOW, kDevPerfLevelLowStr},
{RSMI_DEV_PERF_LEVEL_HIGH, kDevPerfLevelHighStr},
{RSMI_DEV_PERF_LEVEL_MANUAL, kDevPerfLevelManualStr},
{RSMI_DEV_PERF_LEVEL_UNKNOWN, kDevPerfLevelUnknownStr},
};
static bool isRegularFile(std::string fname) {
@@ -78,6 +94,10 @@ static bool isRegularFile(std::string fname) {
return S_ISREG(file_stat.st_mode);
}
#define RET_IF_NONZERO(X) { \
if (X) return X; \
}
Device::Device(std::string p) : path_(p) {
monitor_ = nullptr;
}
@@ -94,18 +114,84 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) {
tempPath += "/device/";
tempPath += kDevAttribNameMap.at(type);
if (!isRegularFile(tempPath)) {
return EISDIR;
}
std::ifstream fs;
fs.open(tempPath);
if (!fs.is_open() || !isRegularFile(tempPath)) {
return -1;
if (!fs.is_open()) {
return errno;
}
fs >> *retStr;
fs.close();
return 0;
}
int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) {
auto tempPath = path_;
tempPath += "/device/";
tempPath += kDevAttribNameMap.at(type);
std::ofstream fs;
fs.open(tempPath);
if (!isRegularFile(tempPath)) {
return EISDIR;
}
if (!fs.is_open()) {
return errno;
}
fs << valStr;
fs.close();
return 0;
}
int Device::writeDevInfo(DevInfoTypes type, uint64_t val) {
switch (type) {
// The caller is responsible for making sure "val" is within a valid range
case kDevOverDriveLevel: // integer between 0 and 20
case kDevPowerProfileMode:
return writeDevInfoStr(type, std::to_string(val));
break;
case kDevPerfLevel: // string: "auto", "low", "high", "manual"
return writeDevInfoStr(type,
kDevPerfLvlMap.at((rsmi_dev_perf_level)val));
break;
case kDevGPUMClk: // integer (index within num-freq range)
case kDevGPUSClk: // integer (index within num-freq range)
case kDevDevID: // string (read-only)
default:
break;
}
return -1;
}
int Device::writeDevInfo(DevInfoTypes type, std::string val) {
switch (type) {
case kDevGPUMClk:
case kDevGPUSClk:
return writeDevInfoStr(type, val);
case kDevOverDriveLevel:
case kDevPerfLevel:
case kDevDevID:
default:
break;
}
return -1;
}
int Device::readDevInfoMultiLineStr(DevInfoTypes type,
std::vector<std::string> *retVec) {
auto tempPath = path_;
@@ -121,7 +207,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
if (!isRegularFile(tempPath)) {
return -1;
return EISDIR;
}
while (std::getline(fs, line)) {
@@ -134,19 +220,18 @@ int Device::readDevInfo(DevInfoTypes type, uint32_t *val) {
assert(val != nullptr);
std::string tempStr;
int ret;
switch (type) {
case kDevDevID:
if (readDevInfoStr(type, &tempStr)) {
return -1;
}
ret = readDevInfoStr(type, &tempStr);
RET_IF_NONZERO(ret);
*val = std::stoi(tempStr, 0, 16);
break;
case kDevOverDriveLevel:
if (readDevInfoStr(type, &tempStr)) {
return -1;
}
ret = readDevInfoStr(type, &tempStr);
RET_IF_NONZERO(ret);
*val = std::stoi(tempStr, 0);
break;
@@ -162,9 +247,8 @@ int Device::readDevInfo(DevInfoTypes type, std::vector<std::string> *val) {
switch (type) {
case kDevGPUMClk:
case kDevGPUSClk:
if (readDevInfoMultiLineStr(type, val)) {
return -1;
}
case kDevPowerProfileMode:
return readDevInfoMultiLineStr(type, val);
break;
default:
@@ -181,9 +265,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
case kDevPerfLevel:
case kDevOverDriveLevel:
case kDevDevID:
if (readDevInfoStr(type, val)) {
return -1;
}
return readDevInfoStr(type, val);
break;
default:
@@ -192,5 +274,6 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
return 0;
}
#undef RET_IF_NONZERO
} // namespace smi
} // namespace amd
+120 -4
Просмотреть файл
@@ -46,6 +46,8 @@
#include <dirent.h>
#include <assert.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <string>
#include <cstdint>
@@ -55,12 +57,15 @@
#include <set>
#include <utility>
#include <functional>
#include <cerrno>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_main.h"
static const char *kPathDRMRoot = "/sys/class/drm";
static const char *kPathHWMonRoot = "/sys/class/hwmon";
static const char *kPathPowerRoot = "/sys/kernel/debug/dri";
static const char *kDeviceNamePrefix = "card";
static const char *kAMDMonitorTypes[] = {"radeon", "amdgpu", ""};
@@ -73,6 +78,14 @@ static bool FileExists(char const *filename) {
return (stat(filename, &buf) == 0);
}
static uint32_t GetDeviceIndex(const std::string s) {
std::string t = s;
size_t tmp = t.find_last_not_of("0123456789");
t.erase(0, tmp+1);
return stoi(t);
}
// Return 0 if same file, 1 if not, and -1 for error
static int SameFile(const std::string fileA, const std::string fileB) {
struct stat aStat;
@@ -104,15 +117,40 @@ static int SameDevice(const std::string fileA, const std::string fileB) {
return SameFile(fileA + "/device", fileB + "/device");
}
void ShowAllTemperatures();
// Call-back function to append to a vector of Devices
static bool GetMonitorDevices(const std::shared_ptr<amd::smi::Device> &d,
void *p) {
std::string val_str;
RocmSMI::RocmSMI() {
assert(p != nullptr);
std::vector<std::shared_ptr<amd::smi::Device>> *device_list =
reinterpret_cast<std::vector<std::shared_ptr<amd::smi::Device>> *>(p);
if (d->monitor() != nullptr) {
device_list->push_back(d);
}
return false;
}
std::vector<std::shared_ptr<amd::smi::Device>> RocmSMI::s_monitor_devices;
RocmSMI::RocmSMI(void) {
auto i = 0;
while (std::string(kAMDMonitorTypes[i]) != "") {
amd_monitor_types_.insert(kAMDMonitorTypes[i]);
++i;
}
// DiscoverDevices() will seach for devices and monitors and update internal
// data structures.
DiscoverDevices();
// IterateSMIDevices will iterate through all the known devices and apply
// the provided call-back to each device found.
IterateSMIDevices(GetMonitorDevices,
reinterpret_cast<void *>(&s_monitor_devices));
}
RocmSMI::~RocmSMI() {
@@ -120,6 +158,13 @@ RocmSMI::~RocmSMI() {
monitors_.clear();
}
RocmSMI& RocmSMI::getInstance(void) {
// Assume c++11 or greater. static objects will be created by only 1 thread
// and creation will be thread-safe.
static RocmSMI singleton;
return singleton;
}
void
RocmSMI::AddToDeviceList(std::string dev_name) {
auto ret = 0;
@@ -137,22 +182,31 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
if (ret == 0) {
dev->set_monitor(*m);
m = monitors_.erase(m);
break;
} else {
assert(ret == 1);
++m;
}
}
std::string d_name = dev_name;
uint32_t d_index = GetDeviceIndex(d_name);
dev->set_index(d_index);
devices_.push_back(dev);
return;
}
uint32_t RocmSMI::DiscoverDevices(void) {
auto ret = 0;
// If this gets called more than once, clear previous findings.
devices_.clear();
monitors_.clear();
ret = DiscoverAMDMonitors();
if (ret) {
@@ -219,9 +273,71 @@ uint32_t RocmSMI::DiscoverAMDMonitors(void) {
return 0;
}
// Since these sysfs files require sudo access, we won't discover them
// with rsmi_init() (and thus always require the user to use "sudo".
// Instead, we will discover() all the power monitors the first time
// they are needed and then check for previous discovery on each subsequent
// call.
uint32_t RocmSMI::DiscoverAMDPowerMonitors(bool force_update) {
if (force_update) {
power_mons_.clear();
}
if (power_mons_.size() != 0) {
return 0;
}
errno = 0;
auto dri_dir = opendir(kPathPowerRoot);
if (dri_dir == nullptr) {
return errno;
}
auto dentry = readdir(dri_dir);
std::string mon_name;
std::string tmp;
while (dentry != nullptr) {
if (dentry->d_name[0] == '.') {
dentry = readdir(dri_dir);
continue;
}
mon_name = kPathPowerRoot;
mon_name += "/";
mon_name += dentry->d_name;
tmp = mon_name + "/amdgpu_pm_info";
if (FileExists(tmp.c_str())) {
std::shared_ptr<PowerMon> mon =
std::shared_ptr<PowerMon>(new PowerMon(mon_name));
power_mons_.push_back(mon);
mon->set_dev_index(GetDeviceIndex(dentry->d_name));
}
dentry = readdir(dri_dir);
}
errno = 0;
if (closedir(dri_dir)) {
power_mons_.clear();
return errno;
}
for (auto m : power_mons_) {
for (auto d : devices_) {
if (m->dev_index() == d->index()) {
d->set_power_monitor(m);
break;
}
}
}
return 0;
}
void RocmSMI::IterateSMIDevices(
std::function<bool(std::shared_ptr<Device>&, void *)> func, void *p) {
if (func == nullptr) {
return;
}
+59 -41
Просмотреть файл
@@ -50,9 +50,11 @@
#include <cstdint>
#include <map>
#include <iostream>
#include <algorithm>
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_utils.h"
namespace amd {
namespace smi {
@@ -63,16 +65,53 @@ struct MonitorNameEntry {
};
static const char *kMonTempFName = "temp1_input";
static const char *kMonFanSpeedFName = "pwm1";
static const char *kMonMaxFanSpeedFName = "pwm1_max";
static const char *kMonTempFName = "temp#_input";
static const char *kMonFanSpeedFName = "pwm#";
static const char *kMonMaxFanSpeedFName = "pwm#_max";
static const char *kMonFanRPMsName = "fan#_input";
static const char *kMonFanControlEnableName = "pwm#_enable";
static const char *kMonNameFName = "name";
static const char *kMonPowerCapName = "power#_cap";
static const char *kMonPowerCapMaxName = "power#_cap_max";
static const char *kMonPowerCapMinName = "power#_cap_min";
static const char *kMonTempMaxName = "temp#_max";
static const char *kMonTempMinName = "temp#_min";
static const char *kMonTempMaxHystName = "temp#_max_hyst";
static const char *kMonTempMinHystName = "temp#_min_hyst";
static const char *kMonTempCriticalName = "temp#_crit";
static const char *kMonTempCriticalHystName = "temp#_crit_hyst";
static const char *kMonTempEmergencyName = "temp#_emergency";
static const char *kMonTempEmergencyHystName = "temp#_emergency_hyst";
static const char *kMonTempCritMinName = "temp#_lcrit";
static const char *kMonTempCritMinHystName = "temp#_lcrit_hyst";
static const char *kMonTempOffsetName = "temp#_offset";
static const char *kMonTempLowestName = "temp#_lowest";
static const char *kMonTempHighestName = "temp#_highest";
static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
{kMonName, kMonNameFName},
{kMonTemp, kMonTempFName},
{kMonFanSpeed, kMonFanSpeedFName},
{kMonMaxFanSpeed, kMonMaxFanSpeedFName}
{kMonFanCntrlEnable, kMonFanControlEnableName},
{kMonMaxFanSpeed, kMonMaxFanSpeedFName},
{kMonFanRPMs, kMonFanRPMsName},
{kMonPowerCap, kMonPowerCapName},
{kMonPowerCapMax, kMonPowerCapMaxName},
{kMonPowerCapMin, kMonPowerCapMinName},
{kMonTempMax, kMonTempMaxName},
{kMonTempMin, kMonTempMinName},
{kMonTempMaxHyst, kMonTempMaxHystName},
{kMonTempMinHyst, kMonTempMinHystName},
{kMonTempCritical, kMonTempCriticalName},
{kMonTempCriticalHyst, kMonTempCriticalHystName},
{kMonTempEmergency, kMonTempEmergencyName},
{kMonTempEmergencyHyst, kMonTempEmergencyHystName},
{kMonTempCritMin, kMonTempCritMinName},
{kMonTempCritMinHyst, kMonTempCritMinHystName},
{kMonTempOffset, kMonTempOffsetName},
{kMonTempLowest, kMonTempLowestName},
{kMonTempHighest, kMonTempHighestName},
};
Monitor::Monitor(std::string path) : path_(path) {
@@ -80,55 +119,34 @@ Monitor::Monitor(std::string path) : path_(path) {
Monitor::~Monitor(void) {
}
int Monitor::readMonitorStr(MonitorTypes type, std::string *retStr) {
auto tempPath = path_;
std::string
Monitor::MakeMonitorPath(MonitorTypes type, int32_t sensor_id) {
std::string tempPath = path_;
std::string fn = kMonitorNameMap.at(type);
assert(retStr != nullptr);
std::replace(fn.begin(), fn.end(), '#', static_cast<char>('0' + sensor_id));
tempPath += "/";
tempPath += kMonitorNameMap.at(type);
tempPath += fn;
std::ifstream fs;
fs.open(tempPath);
if (!fs.is_open()) {
return -1;
}
fs >> *retStr;
fs.close();
return 0;
return tempPath;
}
int Monitor::readMonitor(MonitorTypes type, uint32_t *val) {
assert(val != nullptr);
std::string tempStr;
switch (type) {
case kMonTemp: // Temperature in millidegrees
case kMonFanSpeed:
case kMonMaxFanSpeed:
if (readMonitorStr(type, &tempStr)) {
return -1;
}
*val = std::stoi(tempStr);
return 0;
default:
return -1;
}
int Monitor::writeMonitor(MonitorTypes type, uint32_t sensor_id,
std::string val) {
std::string sysfs_path = MakeMonitorPath(type, sensor_id);
return WriteSysfsStr(sysfs_path, val);
}
// This string version should work for all valid monitor types
int Monitor::readMonitor(MonitorTypes type, std::string *val) {
int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id,
std::string *val) {
assert(val != nullptr);
if (readMonitorStr(type, val)) {
return -1;
}
std::string temp_str;
std::string sysfs_path = MakeMonitorPath(type, sensor_id);
return 0;
return ReadSysfsStr(sysfs_path, val);
}
Исполняемый файл
+152
Просмотреть файл
@@ -0,0 +1,152 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <assert.h>
#include <fstream>
#include <string>
#include <cstdint>
#include <map>
#include <iostream>
#include <sstream>
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_utils.h"
namespace amd {
namespace smi {
static const char *kPowerMonPMName = "amdgpu_pm_info";
// Using this map in case we add other files from dri directory to parse.
static const std::map<PowerMonTypes, const char *> kMonitorNameMap = {
{kPowerMaxGPUPower, kPowerMonPMName},
{kPowerAveGPUPower, kPowerMonPMName},
};
PowerMon::PowerMon(std::string path) : path_(path) {
}
PowerMon::~PowerMon(void) {
}
static int parse_power_str(std::string s, PowerMonTypes type, uint64_t *val) {
std::stringstream ss(s);
std::string ln;
std::string search_str;
assert(val != nullptr);
switch (type) {
case kPowerMaxGPUPower:
search_str = "(max GPU)";
break;
case kPowerAveGPUPower:
search_str = "(average GPU)";
break;
default:
assert(!"Invalid search Power type requested");
return EINVAL;
}
bool found = false;
while (std::getline(ss, ln)) {
if (ln.rfind(search_str) != std::string::npos) {
found = true;
break;
}
}
if (!found) {
return EPERM;
}
ss.clear();
std::stringstream l_ss;
l_ss << ln;
double num_units;
std::string sz;
switch (type) {
case kPowerMaxGPUPower:
case kPowerAveGPUPower:
l_ss >> num_units;
l_ss >> sz;
assert(sz == "W"); // We only expect Watts at this time
*val = num_units * 1000; // Convert Watts to milliwatts
break;
default:
assert(!"Invalid search Power type requested");
return EINVAL;
}
ss.clear();
return 0;
}
int PowerMon::readPowerValue(PowerMonTypes type, uint64_t *power) {
auto tempPath = path_;
std::string fstr;
assert(power != nullptr);
tempPath += "/";
tempPath += kMonitorNameMap.at(type);
int ret = ReadSysfsStr(tempPath, &fstr);
if (ret) {
return ret;
}
return parse_power_str(fstr, type, power);
}
} // namespace smi
} // namespace amd
Исполняемый файл
+95
Просмотреть файл
@@ -0,0 +1,95 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <assert.h>
#include <errno.h>
#include <fstream>
#include <string>
#include <cstdint>
#include <iostream>
#include <sstream>
namespace amd {
namespace smi {
int WriteSysfsStr(std::string path, std::string val) {
std::ofstream fs;
int ret = 0;
fs.open(path);
if (!fs.is_open()) {
ret = errno;
errno = 0;
return ret;
}
fs << val;
fs.close();
return ret;
}
int ReadSysfsStr(std::string path, std::string *retStr) {
std::stringstream ss;
int ret = 0;
assert(retStr != nullptr);
std::ifstream fs;
fs.open(path);
if (!fs.is_open()) {
ret = errno;
errno = 0;
return ret;
}
ss << fs.rdbuf();
fs.close();
*retStr = ss.str();
return ret;
}
} // namespace smi
} // namespace amd