From 93055a1698eef0efed60bd97046308396efe83a3 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Thu, 28 Mar 2019 17:01:35 -0500 Subject: [PATCH] Added new clock types Also added missing error code strings and improved test output messages [ROCm/rocm_smi_lib commit: c77f3c0ebd94b72f0bbe66b6b0ac9493826124a5] --- .../rocm-smi-lib/include/rocm_smi/rocm_smi.h | 13 +++- .../include/rocm_smi/rocm_smi_common.h | 5 +- .../include/rocm_smi/rocm_smi_device.h | 5 +- projects/rocm-smi-lib/src/rocm_smi.cc | 54 ++++++++++++--- projects/rocm-smi-lib/src/rocm_smi_device.cc | 20 ++++-- .../functional/frequencies_read.cc | 38 ++++++----- .../functional/frequencies_read_write.cc | 67 ++++++++++++++++++- .../tests/rocm_smi_test/test_common.cc | 13 +++- .../tests/rocm_smi_test/test_common.h | 1 + 9 files changed, 180 insertions(+), 36 deletions(-) diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index c6f0be7177..2a58a984eb 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -85,7 +85,7 @@ typedef enum { RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or //!< action is not available for the - //!< given input + //!< given input, on the given system RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This //!< may because the operation is not //!< supported by the Linux kernel @@ -147,9 +147,16 @@ typedef rsmi_dev_perf_level_t rsmi_dev_perf_level; typedef enum { RSMI_CLK_TYPE_SYS = 0x0, //!< System clock RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS, - + RSMI_CLK_TYPE_DF, //!< Data Fabric clock (for ASICs + //!< running on a separate clock) + RSMI_CLK_TYPE_DCEF, //!< Display Controller Engine clock + RSMI_CLK_TYPE_SOC, //!< SOC clock RSMI_CLK_TYPE_MEM, //!< Memory clock - RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM + + // Add new clocks to the end (not in the middle) and update + // RSMI_CLK_TYPE_LAST + RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM, + RSMI_CLK_INVALID = 0xFFFFFFFF } rsmi_clk_type_t; /// \cond Ignore in docs. typedef rsmi_clk_type_t rsmi_clk_type; diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h index d7e4e487bf..92b7116969 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h @@ -50,10 +50,11 @@ std::cout << "*****" << __FUNCTION__ << std::endl; \ std::cout << "*****Opening file: " << (FN) << std::endl; \ if ((WR_STR) != nullptr) { \ - std::cout << "***** for writing. Writing: \"" << *(WR_STR) << "\""; \ + std::cout << "***** for writing. Writing: \"" << (WR_STR) << "\""; \ } else { std::cout << "***** for reading.";} \ std::cout << std::endl; \ - std::cout << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ + std::cout << " at " << __FILE__ << ":" << std::dec << __LINE__ << \ + std::endl;\ } // Add different debug filters here, as powers of 2; e.g, 1, 2, 4, 8, ... diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index 07292a6840..be4a74a3bb 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -65,6 +65,9 @@ enum DevInfoTypes { kDevSubSysVendorID, kDevGPUMClk, kDevGPUSClk, + kDevDCEFClk, + kDevFClk, + kDevSOCClk, kDevPCIEClk, kDevPowerProfileMode, kDevUsage, @@ -113,7 +116,7 @@ class Device { uint32_t index_; const RocmSMI_env_vars *env_; template int openSysfsFileStream(DevInfoTypes type, T *fs, - bool write = false); + const char *str = nullptr); int readDevInfoStr(DevInfoTypes type, std::string *retStr); int readDevInfoMultiLineStr(DevInfoTypes type, diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 15bcc0c06f..2fbf95c1a8 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -879,16 +879,30 @@ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_frequencies_t *f) { TRY + amd::smi::DevInfoTypes dev_type; + switch (clk_type) { case RSMI_CLK_TYPE_SYS: - return get_frequencies(amd::smi::kDevGPUSClk, dv_ind, f); + dev_type = amd::smi::kDevGPUSClk; break; case RSMI_CLK_TYPE_MEM: - return get_frequencies(amd::smi::kDevGPUMClk, dv_ind, f); + dev_type = amd::smi::kDevGPUMClk; + break; + case RSMI_CLK_TYPE_DF: + dev_type = amd::smi::kDevFClk; + break; + case RSMI_CLK_TYPE_DCEF: + dev_type = amd::smi::kDevDCEFClk; + break; + case RSMI_CLK_TYPE_SOC: + dev_type = amd::smi::kDevSOCClk; break; default: return RSMI_STATUS_INVALID_ARGS; } + + return get_frequencies(dev_type, dv_ind, f); + CATCH } @@ -940,22 +954,30 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, } int ret_i; + amd::smi::DevInfoTypes dev_type; + switch (clk_type) { case RSMI_CLK_TYPE_SYS: - ret_i = dev->writeDevInfo(amd::smi::kDevGPUSClk, freq_enable_str); - return errno_to_rsmi_status(ret_i); + dev_type = amd::smi::kDevGPUSClk; break; - case RSMI_CLK_TYPE_MEM: - ret_i = dev->writeDevInfo(amd::smi::kDevGPUMClk, freq_enable_str); - return errno_to_rsmi_status(ret_i); + dev_type = amd::smi::kDevGPUMClk; + break; + case RSMI_CLK_TYPE_DF: + dev_type = amd::smi::kDevFClk; + break; + case RSMI_CLK_TYPE_SOC: + dev_type = amd::smi::kDevSOCClk; + break; + case RSMI_CLK_TYPE_DCEF: + dev_type = amd::smi::kDevDCEFClk; break; - default: return RSMI_STATUS_INVALID_ARGS; } - return RSMI_STATUS_SUCCESS; + ret_i = dev->writeDevInfo(dev_type, freq_enable_str); + return errno_to_rsmi_status(ret_i); CATCH } @@ -1691,6 +1713,20 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) { "system for this device type"; break; + case RSMI_STATUS_NOT_FOUND: + *status_string = "An item required to complete the call was not found"; + break; + + case RSMI_STATUS_INSUFFICIENT_SIZE: + *status_string = "Not enough resources were available to fully execute" + " the call"; + break; + + case RSMI_STATUS_UNKNOWN_ERROR: + *status_string = "An unknown error prevented the call from completing" + " successfully"; + break; + default: *status_string = "An unknown error occurred"; return RSMI_STATUS_UNKNOWN_ERROR; diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 4d6db9ff3b..748267b449 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -68,6 +68,9 @@ static const char *kDevSubSysVendorIDFName = "subsystem_vendor"; static const char *kDevOverDriveLevelFName = "pp_sclk_od"; static const char *kDevGPUSClkFName = "pp_dpm_sclk"; static const char *kDevGPUMClkFName = "pp_dpm_mclk"; +static const char *kDevDCEFClkFName = "pp_dpm_dcefclk"; +static const char *kDevFClkFName = "pp_dpm_fclk"; +static const char *kDevSOCClkFName = "pp_dpm_socclk"; static const char *kDevGPUPCIEClkFname = "pp_dpm_pcie"; static const char *kDevPowerProfileModeFName = "pp_power_profile_mode"; static const char *kDevPowerODVoltageFName = "pp_od_clk_voltage"; @@ -104,6 +107,9 @@ static const std::map kDevAttribNameMap = { {kDevSubSysVendorID, kDevSubSysVendorIDFName}, {kDevGPUMClk, kDevGPUMClkFName}, {kDevGPUSClk, kDevGPUSClkFName}, + {kDevDCEFClk, kDevDCEFClkFName}, + {kDevFClk, kDevFClkFName}, + {kDevSOCClk, kDevSOCClkFName}, {kDevPCIEClk, kDevGPUPCIEClkFname}, {kDevPowerProfileMode, kDevPowerProfileModeFName}, {kDevUsage, kDevUsageFName}, @@ -152,13 +158,13 @@ Device:: ~Device() { } template -int Device::openSysfsFileStream(DevInfoTypes type, T *fs, bool write) { +int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { auto sysfs_path = path_; if (env_->path_DRM_root_override && type == env_->enum_override) { sysfs_path = env_->path_DRM_root_override; - if (write) { + if (str) { sysfs_path += ".write"; } } @@ -166,7 +172,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, bool write) { sysfs_path += "/device/"; sysfs_path += kDevAttribNameMap.at(type); - DBG_FILE_ERROR(sysfs_path, (std::string *)nullptr); + DBG_FILE_ERROR(sysfs_path, str); if (!isRegularFile(sysfs_path)) { return EISDIR; } @@ -202,7 +208,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) { std::ofstream fs; int ret; - ret = openSysfsFileStream(type, &fs, true); + ret = openSysfsFileStream(type, &fs, valStr.c_str()); if (ret != 0) { return ret; } @@ -252,9 +258,12 @@ int Device::writeDevInfo(DevInfoTypes type, uint64_t val) { int Device::writeDevInfo(DevInfoTypes type, std::string val) { switch (type) { case kDevGPUMClk: + case kDevDCEFClk: + case kDevFClk: case kDevGPUSClk: case kDevPCIEClk: case kDevPowerODVoltage: + case kDevSOCClk: return writeDevInfoStr(type, val); default: @@ -347,7 +356,10 @@ int Device::readDevInfo(DevInfoTypes type, std::vector *val) { switch (type) { case kDevGPUMClk: case kDevGPUSClk: + case kDevDCEFClk: + case kDevFClk: case kDevPCIEClk: + case kDevSOCClk: case kDevPowerProfileMode: case kDevPowerODVoltage: case kDevErrCntSDMA: diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc index 755cfb5901..cf604f3b3b 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc @@ -109,18 +109,33 @@ void TestFrequenciesRead::Run(void) { TestBase::Run(); + for (uint32_t i = 0; i < num_monitor_devs(); ++i) { + auto freq_output = [&](rsmi_clk_type_t t, const char *name) { + err = rsmi_dev_gpu_clk_freq_get(i, t, &f); + if (err == RSMI_STATUS_NOT_SUPPORTED || err == RSMI_STATUS_FILE_ERROR) { + std::cout << "\t**Get " << name << ": Not supported on this machine" + << std::endl; + } else { + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**Supported " << name << " clock frequencies: "; + std::cout << f.num_supported << std::endl; + print_frequencies(&f); + } + } + }; + PrintDeviceHeader(i); - err = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f); - CHK_ERR_ASRT(err) - IF_VERB(STANDARD) { - std::cout << "\t**Supported GPU Memory clock frequencies: "; - std::cout << f.num_supported << std::endl; - print_frequencies(&f); - } + freq_output(RSMI_CLK_TYPE_MEM, "Supported GPU Memory"); + freq_output(RSMI_CLK_TYPE_SYS, "Supported GPU"); + freq_output(RSMI_CLK_TYPE_DF, "Data Fabric Clock"); + freq_output(RSMI_CLK_TYPE_DCEF, "Display Controller Engine Clock"); + freq_output(RSMI_CLK_TYPE_SOC, "SOC Clock"); + err = rsmi_dev_pci_bandwidth_get(i, &b); - if (err == RSMI_STATUS_NOT_YET_IMPLEMENTED) { + if (err == RSMI_STATUS_NOT_SUPPORTED || err == RSMI_STATUS_FILE_ERROR) { std::cout << "\t**Get PCIE Bandwidth: Not supported on this machine" << std::endl; } else { @@ -131,12 +146,5 @@ void TestFrequenciesRead::Run(void) { print_frequencies(&b.transfer_rate, b.lanes); } } - err = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f); - CHK_ERR_ASRT(err) - IF_VERB(STANDARD) { - std::cout << "\t**Supported GPU clock frequencies: "; - std::cout << f.num_supported << std::endl; - print_frequencies(&f); - } } } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc index 3b8f8999f1..0961c98850 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc @@ -101,9 +101,73 @@ void TestFrequenciesReadWrite::Run(void) { PrintDeviceHeader(dv_ind); for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST; - clk <= RSMI_CLK_TYPE_LAST; ++clk) { + clk <= RSMI_CLK_TYPE_LAST; ++clk) { rsmi_clk = (rsmi_clk_type)clk; + auto freq_read = [&]() -> bool { + ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f); + if (ret == RSMI_STATUS_NOT_SUPPORTED || + ret == RSMI_STATUS_FILE_ERROR) { + std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk) << + ": Not supported on this machine" << std::endl; + return false; + } else { + // CHK_ERR_ASRT(ret) + IF_VERB(STANDARD) { + std::cout << "Initial frequency for clock " << + FreqEnumToStr(rsmi_clk) << " is " << f.current << std::endl; + } + return true; + } + }; + + auto freq_write = [&]() { + // Set clocks to something other than the usual default of the lowest + // frequency. + freq_bitmask = 0b01100; // Try the 3rd and 4th clocks + + std::string freq_bm_str = + std::bitset(freq_bitmask).to_string(); + + freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'), + freq_bm_str.size()-1)); + + IF_VERB(STANDARD) { + std::cout << "Setting frequency mask for " << + FreqEnumToStr(rsmi_clk) << " to 0b" << freq_bm_str << " ..." << + std::endl; + } + ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask); + CHK_ERR_ASRT(ret) + + ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f); + if (ret != RSMI_STATUS_SUCCESS) { + return; + } + + IF_VERB(STANDARD) { + std::cout << "Frequency is now index " << f.current << std::endl; + std::cout << "Resetting mask to all frequencies." << std::endl; + } + ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF); + if (ret != RSMI_STATUS_SUCCESS) { + return; + } + + ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); + if (ret != RSMI_STATUS_SUCCESS) { + return; + } + }; + + if (freq_read()) { + CHK_ERR_ASRT(ret) + } else { + continue; + } + freq_write(); + CHK_ERR_ASRT(ret) +#if 0 ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f); CHK_ERR_ASRT(ret) @@ -140,6 +204,7 @@ void TestFrequenciesReadWrite::Run(void) { ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); CHK_ERR_ASRT(ret) +#endif } } } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc index 39a4ad7aa3..42c153de7f 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc @@ -135,7 +135,18 @@ const char *GetBlockNameStr(rsmi_gpu_block_t id) { return kBlockNameMap.at(id); } - +const char *FreqEnumToStr(rsmi_clk_type rsmi_clk) { + static_assert(RSMI_CLK_TYPE_LAST == RSMI_CLK_TYPE_MEM, + "FreqEnumToStr() needs to be updated"); + switch (rsmi_clk) { + case RSMI_CLK_TYPE_SYS: return "System clock"; + case RSMI_CLK_TYPE_DF: return "Data Fabric clock"; + case RSMI_CLK_TYPE_DCEF: return "Display Controller Engine clock"; + case RSMI_CLK_TYPE_SOC: return "SOC clock"; + case RSMI_CLK_TYPE_MEM: return "Memory clock"; + default: return "Invalid Clock ID"; + } +} #if ENABLE_SMI void DumpMonitorInfo(const TestBase *test) { diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.h b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.h index 5228ff441c..df95fe2f27 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.h +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.h @@ -62,6 +62,7 @@ uint32_t ProcessCmdline(RSMITstGlobals* test, int arg_cnt, char** arg_list); void PrintTestHeader(uint32_t dv_ind); const char *GetBlockNameStr(rsmi_gpu_block_t id); +const char *FreqEnumToStr(rsmi_clk_type rsmi_clk); #if ENABLE_SMI void DumpMonitorInfo(const TestBase *test);