From 573620f586309fe070a65753bcb4ca057dbc8f49 Mon Sep 17 00:00:00 2001 From: "Oliveira, Daniel" Date: Mon, 17 Jul 2023 22:39:08 -0500 Subject: [PATCH] Add revision to --showhw Code changes related to the following: * Added 'rsmi_dev_revision_get()' related code * Test code * Functional tests Change-Id: I8c2097c65384a028c8c8437b717d05d52fe45250 Signed-off-by: Oliveira, Daniel --- include/rocm_smi/rocm_smi.h | 15 ++++++++++++++ include/rocm_smi/rocm_smi_device.h | 1 + python_smi_tools/rocm_smi.py | 20 +++++++++++++++++-- python_smi_tools/rsmiBindings.py.in | 16 +++++++++++---- rocm_smi/example/rocm_smi_example.cc | 3 +++ src/rocm_smi.cc | 17 +++++++++++++++- src/rocm_smi_device.cc | 6 +++++- src/rocm_smi_main.cc | 1 + .../rocm_smi_test/functional/id_info_read.cc | 18 +++++++++++++++++ tests/rocm_smi_test/test_base.cc | 6 ++++++ tests/rocm_smi_test/test_common.cc | 1 + 11 files changed, 96 insertions(+), 8 deletions(-) diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index f0a531f154..6c0e1b9d60 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -1088,6 +1088,21 @@ rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices); */ rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id); +/** + * @brief Get the device revision associated with the device + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t to + * which the revision will be written + * + * @param[in] dv_ind a device index + * + * @param[inout] revision a pointer to uint32_t to which the device revision + * will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision); /** * @brief Get the SKU for a desired device associated with the device with diff --git a/include/rocm_smi/rocm_smi_device.h b/include/rocm_smi/rocm_smi_device.h index c975baae55..3dcf7e1345 100755 --- a/include/rocm_smi/rocm_smi_device.h +++ b/include/rocm_smi/rocm_smi_device.h @@ -100,6 +100,7 @@ enum DevInfoTypes { kDevOverDriveLevel, kDevMemOverDriveLevel, kDevDevID, + kDevDevRevID, kDevDevProdName, kDevDevProdNum, kDevVendorID, diff --git a/python_smi_tools/rocm_smi.py b/python_smi_tools/rocm_smi.py index 2fe68ab518..f8755c1954 100755 --- a/python_smi_tools/rocm_smi.py +++ b/python_smi_tools/rocm_smi.py @@ -249,6 +249,17 @@ def getId(device): return hex(dv_id.value) +def getRev(device): + """ Return the hexadecimal value of a device's Revision + + @param device: DRM device identifier + """ + dv_rev = c_short() + ret = rocmsmi.rsmi_dev_revision_get(device, byref(dv_rev)) + if rsmi_ret_ok(ret, device, 'get_device_rev'): + return hex(dv_rev.value) + + def getMaxPower(device): """ Return the maximum power cap of a given device @@ -1601,19 +1612,23 @@ def showAllConciseHw(deviceList): print('ERROR: Cannot print JSON/CSV output for concise hardware output') sys.exit(1) printLogSpacer(' Concise Hardware Info ') - header = ['GPU', 'DID', 'GFX RAS', 'SDMA RAS', 'UMC RAS', 'VBIOS', 'BUS'] + header = ['GPU', 'DID', 'DREV', 'GFX RAS', 'SDMA RAS', 'UMC RAS', 'VBIOS', 'BUS'] head_widths = [len(head) + 2 for head in header] values = {} for device in deviceList: gpuid = getId(device) if str(gpuid).startswith('0x'): gpuid = str(gpuid)[2:] + gpurev = getRev(device) + if str(gpurev).startswith('0x'): + gpurev = str(gpurev)[2:] + gfxRas = getRasEnablement(device, 'GFX') sdmaRas = getRasEnablement(device, 'SDMA') umcRas = getRasEnablement(device, 'UMC') vbios = getVbiosVersion(device) bus = getBus(device) - values['card%s' % (str(device))] = [device, gpuid, gfxRas, sdmaRas, umcRas, vbios, bus] + values['card%s' % (str(device))] = [device, gpuid, gpurev, gfxRas, sdmaRas, umcRas, vbios, bus] val_widths = {} for device in deviceList: val_widths[device] = [len(str(val)) + 2 for val in values['card%s' % (str(device))]] @@ -1952,6 +1967,7 @@ def showId(deviceList): printLogSpacer(' ID ') for device in deviceList: printLog(device, 'GPU ID', getId(device)) + printLog(device, 'GPU Rev', getRev(device)) printLogSpacer() diff --git a/python_smi_tools/rsmiBindings.py.in b/python_smi_tools/rsmiBindings.py.in index b6e7f2474d..9ffcac138d 100644 --- a/python_smi_tools/rsmiBindings.py.in +++ b/python_smi_tools/rsmiBindings.py.in @@ -11,8 +11,16 @@ import os # Use ROCm installation path if running from standard installation # With File Reorg rsmiBindings.py will be installed in /opt/rocm/libexec/rocm_smi. -# relative path changed accordingly -path_librocm = os.path.dirname(os.path.realpath(__file__)) + '/../../@CMAKE_INSTALL_LIBDIR@/librocm_smi64.so.@VERSION_MAJOR@' +# relative path changed accordingly. +# if ROCM_SMI_LIB_PATH is set, we can load 'librocm_smi64.so' from that location +# +path_librocm = str() +rocm_smi_lib_path = os.getenv('ROCM_SMI_LIB_PATH') +if (rocm_smi_lib_path != None): + path_librocm = rocm_smi_lib_path +else: + path_librocm = os.path.dirname(os.path.realpath(__file__)) + '/../../@CMAKE_INSTALL_LIBDIR@/librocm_smi64.so.@VERSION_MAJOR@' + if not os.path.isfile(path_librocm): print('Unable to find %s . Trying /opt/rocm*' % path_librocm) for root, dirs, files in os.walk('/opt', followlinks=True): @@ -22,9 +30,10 @@ if not os.path.isfile(path_librocm): print('Using lib from %s' % path_librocm) else: print('Unable to find librocm_smi64.so.@VERSION_MAJOR@') +else: + print('Library loaded from: %s ' % path_librocm) # ----------> TODO: Support static libs as well as SO - try: cdll.LoadLibrary(path_librocm) rocmsmi = CDLL(path_librocm) @@ -36,7 +45,6 @@ except OSError: .format('\33[33m', '\033[0m')) exit() - # Device ID dv_id = c_uint64() # GPU ID diff --git a/rocm_smi/example/rocm_smi_example.cc b/rocm_smi/example/rocm_smi_example.cc index bb456f7a0e..9e9019e2b8 100755 --- a/rocm_smi/example/rocm_smi_example.cc +++ b/rocm_smi/example/rocm_smi_example.cc @@ -718,6 +718,9 @@ int main() { ret = rsmi_dev_id_get(i, &val_ui16); CHK_RSMI_RET_I(ret) std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << std::endl; + ret = rsmi_dev_revision_get(i, &val_ui16); + CHK_RSMI_RET_I(ret) + std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << std::endl; char current_compute_partition[256]; current_compute_partition[0] = '\0'; diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 4cd359ce3a..4851e8e398 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -632,7 +632,7 @@ rsmi_status_t rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_error_count_t *ec) { std::vector val_vec; - rsmi_status_t ret; + rsmi_status_t ret(RSMI_STATUS_NOT_SUPPORTED); std::ostringstream ss; TRY @@ -820,6 +820,21 @@ rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) { return ret; } +rsmi_status_t +rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision) { + std::ostringstream outss; + rsmi_status_t ret; + outss << __PRETTY_FUNCTION__ << "| ======= start ======="; + LOG_TRACE(outss); + CHK_SUPPORT_NAME_ONLY(revision) + + ret = get_id(dv_ind, amd::smi::kDevDevRevID, revision); + outss << __PRETTY_FUNCTION__ << " | ======= end =======" + << ", reporting " << amd::smi::getRSMIStatusString(ret); + LOG_TRACE(outss); + return ret; +} + rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *id) { TRY diff --git a/src/rocm_smi_device.cc b/src/rocm_smi_device.cc index 554b8c0eb8..bdc5984ce4 100755 --- a/src/rocm_smi_device.cc +++ b/src/rocm_smi_device.cc @@ -85,6 +85,7 @@ static const char *kDevPerfLevelFName = "power_dpm_force_performance_level"; static const char *kDevDevProdNameFName = "product_name"; static const char *kDevDevProdNumFName = "product_number"; static const char *kDevDevIDFName = "device"; +static const char *kDevDevRevIDFName = "revision"; static const char *kDevVendorIDFName = "vendor"; static const char *kDevSubSysDevIDFName = "subsystem_device"; static const char *kDevSubSysVendorIDFName = "subsystem_vendor"; @@ -238,6 +239,7 @@ static const std::map kDevAttribNameMap = { {kDevDevProdName, kDevDevProdNameFName}, {kDevDevProdNum, kDevDevProdNumFName}, {kDevDevID, kDevDevIDFName}, + {kDevDevRevID, kDevDevRevIDFName}, {kDevVendorID, kDevVendorIDFName}, {kDevSubSysDevID, kDevSubSysDevIDFName}, {kDevSubSysVendorID, kDevSubSysVendorIDFName}, @@ -374,8 +376,8 @@ static const std::map kDevFuncDependsMap = { // Functions with only mandatory dependencies {"rsmi_dev_vram_vendor_get", {{kDevVramVendorFName}, {}}}, {"rsmi_dev_id_get", {{kDevDevIDFName}, {}}}, + {"rsmi_dev_revision_get", {{kDevDevRevIDFName}, {}}}, {"rsmi_dev_vendor_id_get", {{kDevVendorIDFName}, {}}}, - {"rsmi_dev_name_get", {{kDevVendorIDFName, kDevDevIDFName}, {}}}, {"rsmi_dev_sku_get", {{kDevDevProdNumFName}, {}}}, @@ -889,6 +891,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { switch (type) { case kDevDevID: + case kDevDevRevID: case kDevSubSysDevID: case kDevSubSysVendorID: case kDevVendorID: @@ -1026,6 +1029,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) { case kDevDevProdName: case kDevDevProdNum: case kDevDevID: + case kDevDevRevID: case kDevSubSysDevID: case kDevSubSysVendorID: case kDevVendorID: diff --git a/src/rocm_smi_main.cc b/src/rocm_smi_main.cc index 3a5565dbe9..92ffe5af4f 100755 --- a/src/rocm_smi_main.cc +++ b/src/rocm_smi_main.cc @@ -84,6 +84,7 @@ amd::smi::RocmSMI::devInfoTypesStrings = { {amd::smi::kDevOverDriveLevel, amdSMI + "kDevOverDriveLevel"}, {amd::smi::kDevMemOverDriveLevel, amdSMI + "kDevMemOverDriveLevel"}, {amd::smi::kDevDevID, amdSMI + "kDevDevID"}, + {amd::smi::kDevDevRevID, amdSMI + "kDevDevRevID"}, {amd::smi::kDevDevProdName, amdSMI + "kDevDevProdName"}, {amd::smi::kDevDevProdNum, amdSMI + "kDevDevProdNum"}, {amd::smi::kDevVendorID, amdSMI + "kDevVendorID"}, diff --git a/tests/rocm_smi_test/functional/id_info_read.cc b/tests/rocm_smi_test/functional/id_info_read.cc index 11828feb85..1988d951a1 100755 --- a/tests/rocm_smi_test/functional/id_info_read.cc +++ b/tests/rocm_smi_test/functional/id_info_read.cc @@ -121,6 +121,24 @@ void TestIdInfoRead::Run(void) { err = rsmi_dev_id_get(i, nullptr); ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); } + // Get device Revision + err = rsmi_dev_revision_get(i, &id); + if (err == RSMI_STATUS_NOT_SUPPORTED) { + rsmi_status_t ret; + // Verify api support checking functionality is working + ret = rsmi_dev_revision_get(i, nullptr); + ASSERT_EQ(ret, RSMI_STATUS_NOT_SUPPORTED); + } else { + CHK_ERR_ASRT(err) + + IF_VERB(STANDARD) { + std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << id << std::endl; + } + // Verify api support checking functionality is working + err = rsmi_dev_revision_get(i, nullptr); + ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); + } + err = rsmi_dev_name_get(i, buffer, kBufferLen); if (err == RSMI_STATUS_NOT_SUPPORTED) { std::cout << "\t**Device Marketing name not found on this system." << diff --git a/tests/rocm_smi_test/test_base.cc b/tests/rocm_smi_test/test_base.cc index 6984736e75..a406868c63 100755 --- a/tests/rocm_smi_test/test_base.cc +++ b/tests/rocm_smi_test/test_base.cc @@ -132,6 +132,12 @@ void TestBase::PrintDeviceHeader(uint32_t dv_ind) { IF_VERB(STANDARD) { std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << std::endl; } + err = rsmi_dev_revision_get(dv_ind, &val_ui16); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << std::endl; + } + char name[128]; err = rsmi_dev_name_get(dv_ind, name, 128); CHK_ERR_ASRT(err) diff --git a/tests/rocm_smi_test/test_common.cc b/tests/rocm_smi_test/test_common.cc index eabc6125b5..d7a8a34d86 100755 --- a/tests/rocm_smi_test/test_common.cc +++ b/tests/rocm_smi_test/test_common.cc @@ -278,6 +278,7 @@ void DumpMonitorInfo(const TestBase *test) { }; print_val_str(amd::smi::kDevDevID, "Device ID: "); + print_val_str(amd::smi::kDevDevRevID, "Dev.Rev.ID: "); print_val_str(amd::smi::kDevPerfLevel, "Performance Level: "); print_val_str(amd::smi::kDevOverDriveLevel, "OverDrive Level: "); print_vector(amd::smi::kDevGPUMClk,