Merge amd-staging into amd-master 20231005

Change-Id: Ie217f139f63aa10ec5e9ce48797b7cb94864736d
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
This commit is contained in:
Galantsev, Dmitrii
2023-10-05 16:22:31 -05:00
7 zmienionych plików z 154 dodań i 100 usunięć
+16 -14
Wyświetl plik
@@ -1,11 +1,11 @@
#
# Minimum version of cmake required
# Minimum version of cmake and C++ required
#
cmake_minimum_required(VERSION 3.6.3)
cmake_minimum_required(VERSION 3.14)
set(AMD_SMI_LIBS_TARGET "amd_smi_libraries")
set(ROCM_SMI_LIBS_TARGET "rocm_smi_libraries")
set ( BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.")
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.")
## Set default module path if not already set
if(NOT DEFINED CMAKE_MODULE_PATH)
@@ -35,19 +35,19 @@ find_program (GIT NAMES git)
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
get_package_version_number("5.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
get_package_version_number("6.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
message("Package version: ${PKG_VERSION_STR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${VERSION_MINOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_PATCH "0")
set(${AMD_SMI_LIBS_TARGET}_VERSION_BUILD "0")
set(${ROCM_SMI_LIBS_TARGET}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${ROCM_SMI_LIBS_TARGET}_VERSION_MINOR "${VERSION_MINOR}")
set(${ROCM_SMI_LIBS_TARGET}_VERSION_PATCH "0")
set(${ROCM_SMI_LIBS_TARGET}_VERSION_BUILD "0")
# The following default version values should be updated as appropriate for
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
# by git tags (through "git describe") when they are present.
set(PKG_VERSION_MAJOR 1)
set(PKG_VERSION_MINOR 0)
set(PKG_VERSION_MAJOR "${VERSION_MAJOR}")
set(PKG_VERSION_MINOR "${VERSION_MINOR}")
set(PKG_VERSION_PATCH 0)
set(PKG_VERSION_NUM_COMMIT 0)
@@ -57,7 +57,9 @@ set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE STRING "Default installation director
set(COMMON_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR} CACHE STRING "Location source code common root.")
set(ROCM_SMI_PACKAGE rocm-smi-lib)
project(${AMD_SMI_LIBS_TARGET})
project(${ROCM_SMI_LIBS_TARGET})
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include(GNUInstallDirs)
set(COMMON_PROJ_ROOT ${PROJECT_SOURCE_DIR})
@@ -70,7 +72,7 @@ endif()
## Compiler flags
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -m64 -msse -msse2 -std=c++11 ")
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -m64 -msse -msse2 ")
# Security options
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wconversion -Wcast-align ")
@@ -78,7 +80,7 @@ set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wformat=2 -fno-common -Wstrict-overflow ")
# Intentionally leave out -Wsign-promo. It causes spurious warnings.
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Woverloaded-virtual -Wreorder ")
"${CMAKE_CXX_FLAGS} -Woverloaded-virtual -Wreorder ")
# Clang does not set the build-id
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+1 -1
Wyświetl plik
@@ -52,7 +52,7 @@ function( parse_version VERSION_STRING )
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
endif ()
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
string ( REGEX MATCHALL "[0-9]+" VERSIONS ${VERSION_STRING} )
list ( LENGTH VERSIONS VERSION_COUNT )
if ( ${VERSION_COUNT} GREATER 0)
+38 -5
Wyświetl plik
@@ -30,8 +30,8 @@ from rsmiBindings import *
# Minor version - Increment when adding a new feature, set to 0 when major is incremented
# Patch version - Increment when adding a fix, set to 0 when minor is incremented
SMI_MAJ = 1
SMI_MIN = 4
SMI_PAT = 1
SMI_MIN = 5
SMI_PAT = 0
__version__ = '%s.%s.%s' % (SMI_MAJ, SMI_MIN, SMI_PAT)
# Set to 1 if an error occurs
@@ -1694,6 +1694,32 @@ def setNPSMode(deviceList, npsMode):
printErrLog(device, 'Failed to retrieve NPS mode, even though device supports it.')
printLogSpacer()
def showVersion(isCSV=False):
values = { 'ROCM-SMI version': __version__ }
version = rsmi_version_t()
status = rocmsmi.rsmi_version_get(byref(version))
if status == 0:
version_string = "%u.%u.%u" % (version.major, version.minor, version.patch)
values['ROCM-SMI-LIB version'] = version_string
if isCSV:
print('name, value')
for k in values.keys():
print('%s, %s' % (k, values[k]))
return
if PRINT_JSON:
temp_str = '{\n'
for k in values.keys():
temp_str += ' "%s": "%s",\n' % (k, values[k])
if len(values.keys()) > 1:
# replace ',\n' with '\n}'
temp_str = temp_str[:-2]
temp_str += '\n}'
print(temp_str)
return
for k in values.keys():
print('%s: %s' % (k, values[k]))
def showAllConcise(deviceList):
""" Display critical info for all devices in a concise format
@@ -2071,7 +2097,7 @@ def showFwInfo(deviceList, fwType):
ret = rocmsmi.rsmi_dev_firmware_version_get(device, fw_block_names_l.index(fw_name), byref(fw_ver))
if rsmi_ret_ok(ret, device, 'get_firmware_version_' + str(fw_name)):
# The VCN, VCE, UVD, SOS and ASD firmware's value needs to be in hexadecimal
if fw_name in ['VCN', 'VCE', 'UVD', 'SOS', 'ASD']:
if fw_name in ['VCN', 'VCE', 'UVD', 'SOS', 'ASD', 'MES', 'MES KIQ']:
printLog(device, '%s firmware version' % (fw_name),
'\t0x%s' % (str(hex(fw_ver.value))[2:].zfill(8)))
# The TA XGMI, TA RAS, and SMC firmware's hex value looks like 0x12345678
@@ -2974,7 +3000,7 @@ def showTempGraph(deviceList):
printLogSpacer()
def showVersion(deviceList, component):
def showDriverVersion(deviceList, component):
""" Display the software version for the specified component
@param deviceList: List of DRM devices (can be a single-item list)
@@ -3633,6 +3659,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='AMD ROCm System Management Interface | ROCM-SMI version: %s' % __version__,
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=90, width=120))
groupVersion = parser.add_argument_group()
groupDev = parser.add_argument_group()
groupDisplayOpt = parser.add_argument_group('Display Options')
groupDisplayTop = parser.add_argument_group('Topology')
@@ -3646,6 +3673,7 @@ if __name__ == '__main__':
groupResponse = parser.add_argument_group('Auto-response options')
groupActionOutput = parser.add_argument_group('Output options')
groupVersion.add_argument('-V', '--version', help='Show version information', action='store_true')
groupDev.add_argument('-d', '--device', help='Execute command on specified device', type=int, nargs='+')
groupDisplayOpt.add_argument('--alldevices', action='store_true') # ------------- function deprecated, no help menu
groupDisplayOpt.add_argument('--showhw', help='Show Hardware details', action='store_true')
@@ -3794,11 +3822,16 @@ if __name__ == '__main__':
# Must set PRINT_JSON early so the prints can be silenced
if args.json or args.csv:
PRINT_JSON = True
# Initialize rsmiBindings
rocmsmi = initRsmiBindings(silent=PRINT_JSON)
# Initialize the rocm SMI library
initializeRsmi()
if args.version:
showVersion(isCSV=args.csv)
sys.exit()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
if args.loglevel is not None:
numericLogLevel = getattr(logging, args.loglevel.upper(), logging.WARNING)
@@ -3897,7 +3930,7 @@ if __name__ == '__main__':
if args.showhw:
showAllConciseHw(deviceList)
if args.showdriverversion:
showVersion(deviceList, rsmi_sw_component_t.RSMI_SW_COMP_DRIVER)
showDriverVersion(deviceList, rsmi_sw_component_t.RSMI_SW_COMP_DRIVER)
if args.showtempgraph:
showTempGraph(deviceList)
if args.showid:
+17 -17
Wyświetl plik
@@ -37,8 +37,6 @@ def initRsmiBindings(silent=False):
print_silent('Using lib from %s' % path_librocm)
else:
print('Unable to find librocm_smi64.so.@VERSION_MAJOR@')
else:
print_silent('Library loaded from: %s ' % path_librocm)
# ----------> TODO: Support static libs as well as SO
try:
@@ -419,25 +417,27 @@ class rsmi_fw_block_t(c_int):
RSMI_FW_BLOCK_ME = 4
RSMI_FW_BLOCK_MEC = 5
RSMI_FW_BLOCK_MEC2 = 6
RSMI_FW_BLOCK_PFP = 7
RSMI_FW_BLOCK_RLC = 8
RSMI_FW_BLOCK_RLC_SRLC = 9
RSMI_FW_BLOCK_RLC_SRLG = 10
RSMI_FW_BLOCK_RLC_SRLS = 11
RSMI_FW_BLOCK_SDMA = 12
RSMI_FW_BLOCK_SDMA2 = 13
RSMI_FW_BLOCK_SMC = 14
RSMI_FW_BLOCK_SOS = 15
RSMI_FW_BLOCK_TA_RAS = 16
RSMI_FW_BLOCK_TA_XGMI = 17
RSMI_FW_BLOCK_UVD = 18
RSMI_FW_BLOCK_VCE = 19
RSMI_FW_BLOCK_VCN = 20
RSMI_FW_BLOCK_MES = 7
RSMI_FW_BLOCK_MES_KIQ = 8
RSMI_FW_BLOCK_PFP = 9
RSMI_FW_BLOCK_RLC = 10
RSMI_FW_BLOCK_RLC_SRLC = 11
RSMI_FW_BLOCK_RLC_SRLG = 12
RSMI_FW_BLOCK_RLC_SRLS = 13
RSMI_FW_BLOCK_SDMA = 14
RSMI_FW_BLOCK_SDMA2 = 15
RSMI_FW_BLOCK_SMC = 16
RSMI_FW_BLOCK_SOS = 17
RSMI_FW_BLOCK_TA_RAS = 18
RSMI_FW_BLOCK_TA_XGMI = 19
RSMI_FW_BLOCK_UVD = 20
RSMI_FW_BLOCK_VCE = 21
RSMI_FW_BLOCK_VCN = 22
RSMI_FW_BLOCK_LAST = RSMI_FW_BLOCK_VCN
# The following list correlated to the rsmi_fw_block_t
fw_block_names_l = ['ASD', 'CE', 'DMCU', 'MC', 'ME', 'MEC', 'MEC2', 'PFP',\
fw_block_names_l = ['ASD', 'CE', 'DMCU', 'MC', 'ME', 'MEC', 'MEC2', 'MES', 'MES KIQ', 'PFP',\
'RLC', 'RLC SRLC', 'RLC SRLG', 'RLC SRLS', 'SDMA', 'SDMA2',\
'SMC', 'SOS', 'TA RAS', 'TA XGMI', 'UVD', 'VCE', 'VCN']
+43 -27
Wyświetl plik
@@ -1232,16 +1232,16 @@ For the new format, GFXCLK field will show min and max values(0/1). If the curre
frequency in neither min/max but lies within the range, this is indicated by
an additional value followed by * at index 1 and max value at index 2.
*/
static const uint32_t kOD_SCLK_label_array_index = 0;
static const uint32_t kOD_MCLK_label_array_index =
kOD_SCLK_label_array_index + 3;
static const uint32_t kOD_VDDC_CURVE_label_array_index =
kOD_MCLK_label_array_index + 2;
static const uint32_t kOD_OD_RANGE_label_array_index =
kOD_VDDC_CURVE_label_array_index + 4;
static const uint32_t kOD_VDDC_CURVE_start_index =
constexpr uint32_t kOD_SCLK_label_array_index = 0;
constexpr uint32_t kOD_MCLK_label_array_index =
kOD_SCLK_label_array_index + 3;
constexpr uint32_t kOD_VDDC_CURVE_label_array_index =
kOD_MCLK_label_array_index + 2;
constexpr uint32_t kOD_OD_RANGE_label_array_index =
kOD_VDDC_CURVE_label_array_index + 4;
constexpr uint32_t kOD_VDDC_CURVE_start_index =
kOD_OD_RANGE_label_array_index + 3;
// static const uint32_t kOD_VDDC_CURVE_num_lines =
// constexpr uint32_t kOD_VDDC_CURVE_num_lines =
// kOD_VDDC_CURVE_start_index + 4;
static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
@@ -1283,41 +1283,57 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_SCLK_label_array_index + 2);
if (val_vec.size() < (kOD_MCLK_label_array_index + 1)) {
return RSMI_STATUS_UNEXPECTED_SIZE;
}
// The condition below checks if it is the old style or new style format.
if (val_vec[kOD_MCLK_label_array_index] == "OD_MCLK:") {
p->curr_mclk_range.lower_bound = 0;
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 1);
p->curr_mclk_range.lower_bound = 0;
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 1);
} else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") {
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 1);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
} else if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") {
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_SCLK_label_array_index + 3);
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 2);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 1);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
} else {
if (val_vec.size() < (kOD_MCLK_label_array_index + 3)) {
return RSMI_STATUS_UNEXPECTED_SIZE;
}
if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") {
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_SCLK_label_array_index + 3);
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 2);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
}
return RSMI_STATUS_NOT_YET_IMPLEMENTED;
}
if (val_vec.size() < kOD_VDDC_CURVE_label_array_index) {
return RSMI_STATUS_UNEXPECTED_SIZE;
}
assert(val_vec[kOD_VDDC_CURVE_label_array_index] == "OD_VDDC_CURVE:");
if (val_vec[kOD_VDDC_CURVE_label_array_index] != "OD_VDDC_CURVE:") {
return RSMI_STATUS_UNEXPECTED_DATA;
}
uint32_t tmp = kOD_VDDC_CURVE_label_array_index + 1;
if (val_vec.size() < (tmp + RSMI_NUM_VOLTAGE_CURVE_POINTS)) {
return RSMI_STATUS_UNEXPECTED_SIZE;
}
for (uint32_t i = 0; i < RSMI_NUM_VOLTAGE_CURVE_POINTS; ++i) {
freq_volt_string_to_point(val_vec[tmp + i], &(p->curve.vc_points[i]));
}
if (val_vec.size() < (kOD_OD_RANGE_label_array_index + 2)) {
return RSMI_STATUS_UNEXPECTED_SIZE;
}
assert(val_vec[kOD_OD_RANGE_label_array_index] == "OD_RANGE:");
if (val_vec[kOD_OD_RANGE_label_array_index] != "OD_RANGE:") {
return RSMI_STATUS_UNEXPECTED_DATA;
+15 -20
Wyświetl plik
@@ -36,24 +36,13 @@ set(CMAKE_INSTALL_RPATH
${CMAKE_INSTALL_RPATH}
${RSMITST_RPATH})
# TODO: Try to find googletest
# DISABLED because we want to install gtest with rocm_smi_lib ourselves
#find_package(GTest 1.12.0)
# GTest_FOUND is set to TRUE if ANY version is found
# GTest_VERSION is set if 1.12.0 or newer version is found
if(NOT GTest_FOUND STREQUAL "TRUE" OR NOT DEFINED GTest_VERSION)
# Google Test wasn't found. Download and compile ourselves
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG release-1.12.0)
FetchContent_MakeAvailable(googletest)
install(TARGETS gtest gtest_main
DESTINATION ${SHARE_INSTALL_PREFIX}/rsmitst_tests
COMPONENT ${TESTS_COMPONENT})
endif()
# Download and compile googletest
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.14.0)
FetchContent_MakeAvailable(googletest)
# Other source directories
aux_source_directory(${SRC_DIR}/functional functionalSources)
@@ -70,13 +59,13 @@ target_include_directories(${RSMITST} PUBLIC ${SRC_DIR}/..)
target_link_libraries(
${RSMITST}
PUBLIC ${ROCM_SMI_TARGET}
PUBLIC gtest
PUBLIC gtest_main
PUBLIC GTest::gtest_main
PUBLIC c
PUBLIC stdc++
PUBLIC pthread
PUBLIC dl)
# install tests
install(TARGETS ${RSMITST}
DESTINATION ${SHARE_INSTALL_PREFIX}/rsmitst_tests
COMPONENT ${TESTS_COMPONENT})
@@ -84,3 +73,9 @@ install(TARGETS ${RSMITST}
install(FILES rsmitst.exclude
DESTINATION ${SHARE_INSTALL_PREFIX}/rsmitst_tests
COMPONENT ${TESTS_COMPONENT})
# install googletest libraries with tests
install(TARGETS gtest gtest_main
DESTINATION ${SHARE_INSTALL_PREFIX}/rsmitst_tests
COMPONENT ${TESTS_COMPONENT})
@@ -43,9 +43,7 @@
*
*/
#include <stdint.h>
#include <stddef.h>
#include <cstdint>
#include <iostream>
#include <string>
@@ -87,15 +85,23 @@ void TestFrequenciesRead::Close() {
static void print_frequencies(rsmi_frequencies_t *f, uint32_t *l = nullptr) {
assert(f != nullptr);
for (uint32_t j = 0; j < f->num_supported; ++j) {
std::cout << "\t** " << j << ": " << f->frequency[j];
for (uint32_t clk_i = 0; clk_i < f->num_supported; ++clk_i) {
std::string clk_i_str;
if (f->has_deep_sleep) {
clk_i_str = (clk_i == 0) ? "S" : std::to_string(clk_i-1);
} else {
clk_i_str = std::to_string(clk_i);
}
std::cout << "\t** " <<
std::setw(2) << std::right << clk_i_str << ": " <<
std::setw(11) << std::right << f->frequency[clk_i];
if (l != nullptr) {
std::cout << "T/s; x" << l[j];
std::cout << "T/s; x" << l[clk_i];
} else {
std::cout << "Hz";
}
if (j == f->current) {
if (clk_i == f->current) {
std::cout << " *";
}
std::cout << std::endl;
@@ -123,12 +129,14 @@ void TestFrequenciesRead::Run(void) {
// Verify api support checking functionality is working
err = rsmi_dev_gpu_clk_freq_get(i, t, nullptr);
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
return;
}
// special driver issue, shouldn't normally occur
if (err == RSMI_STATUS_UNEXPECTED_DATA) {
std::cerr << "WARN: Clock file [" << FreqEnumToStr(t) << "] exists on device [" << i << "] but empty!" << std::endl;
std::cerr << " Likely a driver issue!" << std::endl;
return;
}
CHK_ERR_ASRT(err)
@@ -158,15 +166,15 @@ void TestFrequenciesRead::Run(void) {
err = rsmi_dev_pci_bandwidth_get(i, nullptr);
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
} else {
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**Supported PCIe bandwidths: ";
std::cout << b.transfer_rate.num_supported << std::endl;
print_frequencies(&b.transfer_rate, b.lanes);
// Verify api support checking functionality is working
err = rsmi_dev_pci_bandwidth_get(i, nullptr);
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
}
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**Supported PCIe bandwidths: ";
std::cout << b.transfer_rate.num_supported << std::endl;
print_frequencies(&b.transfer_rate, b.lanes);
// Verify api support checking functionality is working
err = rsmi_dev_pci_bandwidth_get(i, nullptr);
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
}
}
}
}