diff --git a/projects/amdsmi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/include/rocm_smi/rocm_smi.h index b7b5696f02..601814c333 100755 --- a/projects/amdsmi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/include/rocm_smi/rocm_smi.h @@ -165,8 +165,9 @@ typedef enum { //!< memory clock RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK, //!< Stable power state with minimum //!< system clock + RSMI_DEV_PERF_LEVEL_DETERMINISM, //!< Performance determinism state - RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK, + RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_DETERMINISM, RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 //!< Unknown performance level } rsmi_dev_perf_level_t; @@ -2062,6 +2063,37 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf); +/** + * @brief Enter performance determinism mode with provided device index. + * + * @details Given a device index @p dv_ind and @p freq_bitmask, this function + * will enable performance determinism mode, which enforces a GFXCLK frequency + * SoftMax limit per GPU set by the user. This prevents the GFXCLK FLL from + * stretching when running the same workload on different GPUS, making + * performance variation minimal. This call will result in the performance + * level ::rsmi_dev_perf_level_t of the device being + * ::RSMI_DEV_PERF_LEVEL_DETERMINISM. If a bit in @p freq_bitmask has a value + * of 1, then the frequency (as ordered in an ::rsmi_frequencies_t returned + * by rsmi_dev_gpu_clk_freq_get()) corresponding to that bit index will be + * allowed. + * ::rsmi_dev_perf_level_set() should be called with ::RSMI_DEV_PERF_LEVEL_AUTO + * to restore the performance level to the default value. + * + * @param[in] dv_ind a device index + * + * @param[in] freq_bitmask A bitmask indicating the indices of the + * frequencies that are to be enabled (1) and disabled (0). Only the lowest + * ::rsmi_frequencies_t.num_supported bits of this mask are relevant. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ + +rsmi_status_t rsmi_perf_determinism_mode_set(uint32_t dv_ind, + uint64_t freq_bitmask); /** * @brief Get the overdrive percent associated with the device with provided * device index. @@ -2085,6 +2117,7 @@ rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid * */ + rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); /** diff --git a/projects/amdsmi/python_smi_tools/rocm_smi.py b/projects/amdsmi/python_smi_tools/rocm_smi.py index 47b7f6f9ed..f9bf963272 100755 --- a/projects/amdsmi/python_smi_tools/rocm_smi.py +++ b/projects/amdsmi/python_smi_tools/rocm_smi.py @@ -626,6 +626,21 @@ def resetXgmiErr(deviceList): printLogSpacer() +def resetPerfDeterminism(deviceList): + """ Reset Performance Determinism + + @param deviceList: Disable Performance Determinism for these devices + """ + printLogSpacer('Disable Performance Determinism') + for device in deviceList: + ret = rocmsmi.rsmi_dev_perf_level_set(device, rsmi_dev_perf_level_t(0))) + if rsmi_ret_ok(ret, device, 'disable performance determinism'): + printLog(device, 'Successfully disabled performance determinism', None) + else: + logging.error('GPU[%s]\t\t: Unable to diable performance determinism', device) + printLogSpacer() + + def setClockRange(deviceList, clkType, level, value, autoRespond): """ Set the range for the specified clktype in the PowerPlay table for a list of devices. @@ -870,6 +885,30 @@ def setClocks(deviceList, clktype, clk): printLogSpacer() +def setPerfDeterminism(deviceList, value): + """ Set clock frequency level for a list of devices to enable performance + determinism. + + @param deviceList: List of DRM devices (can be a single-item list) + @param value: Clock frequency level to set + """ + global RETCODE + try: + int(value) + except ValueError: + printErrLog(device, 'Unable to set Performance Determinism') + logging.error('%s is not an integer', value) + RETCODE = 1 + return + for device in deviceList: + ret = rocmsmi.rsmi_perf_determinism_mode_set(device, int(value)) + if rsmi_ret_ok(ret, device): + printLog(device, 'Successfully set clock frequency', str(value)) + else: + printErrLog(device, 'Unable to set clock frequency', str(value)) + RETCODE = 1 + + def resetGpu(device): """ Perform a GPU reset on the specified device @@ -2493,6 +2532,7 @@ if __name__ == '__main__': help='Set the maximum GPU power back to the device deafult state', action='store_true') groupActionReset.add_argument('--resetxgmierr', help='Reset XGMI error count', action='store_true') + groupAction.add_argument('--resetperfdeterminism', help='Disable performance determinism', action='store_true') groupAction.add_argument('--setclock', help='Set Clock Frequency Level(s) for specified clock (requires manual Perf level)', type=str, metavar='LEVEL', nargs=2) groupAction.add_argument('--setsclk', help='Set GPU Clock Frequency Level(s) (requires manual Perf level)', @@ -2523,6 +2563,8 @@ if __name__ == '__main__': groupAction.add_argument('--setprofile', help='Specify Power Profile level (#) or a quoted string of CUSTOM Profile attributes "# ' '# # #..." (requires manual Perf level)') + groupAction.add_argument('--setperfdeterminism', help='Set clock frequency limit to get minimal performance variation', + type=int, metavar='LEVEL', nargs='+') groupAction.add_argument('--rasenable', help='Enable RAS for specified block and error type', type=str, nargs=2, metavar=('BLOCK', 'ERRTYPE')) groupAction.add_argument('--rasdisable', help='Disable RAS for specified block and error type', type=str, nargs=2, @@ -2560,7 +2602,7 @@ if __name__ == '__main__': if args.setsclk or args.setmclk or args.setpcie or args.resetfans or args.setfan or args.setperflevel or \ args.load or args.resetclocks or args.setprofile or args.resetprofile or args.setoverdrive or \ args.setmemoverdrive or args.setpoweroverdrive or args.resetpoweroverdrive or \ - args.rasenable or args.rasdisable or args.rasinject or args.gpureset or \ + args.rasenable or args.rasdisable or args.rasinject or args.gpureset or args.setperfdeterminism or\ args.setslevel or args.setmlevel or args.setvc or args.setsrange or args.setmrange or args.setclock: relaunchAsSudo() @@ -2773,10 +2815,14 @@ if __name__ == '__main__': setClockRange(deviceList, 'sclk', args.setsrange[0], args.setsrange[1], args.autorespond) if args.setmrange: setClockRange(deviceList, 'mclk', args.setmrange[0], args.setmrange[1], args.autorespond) + if args.setperfdeterminism: + setPerfDeterminism(deviceList, args.setperfdeterminism[0]) if args.resetprofile: resetProfile(deviceList) if args.resetxgmierr: resetXgmiErr(deviceList) + if args.resetperfdeterminism: + resetPerfDeterminism(deviceList) if args.rasenable: setRas(deviceList, 'enable', args.rasenable[0], args.rasenable[1]) if args.rasdisable: diff --git a/projects/amdsmi/python_smi_tools/rsmiBindings.py b/projects/amdsmi/python_smi_tools/rsmiBindings.py index 91b6751634..e86180be31 100644 --- a/projects/amdsmi/python_smi_tools/rsmiBindings.py +++ b/projects/amdsmi/python_smi_tools/rsmiBindings.py @@ -71,7 +71,8 @@ class rsmi_dev_perf_level_t(c_int): RSMI_DEV_PERF_LEVEL_STABLE_PEAK = 5 RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK = 6 RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK = 7 - RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK + RSMI_DEV_PERF_LEVEL_DETERMINISM = 8 + RSMI_DEV_PERF_LEVEL_LAST = RSMI_DEV_PERF_LEVEL_DETERMINISM RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 diff --git a/projects/amdsmi/src/rocm_smi.cc b/projects/amdsmi/src/rocm_smi.cc index e87ce75bf8..bab5fc1e9b 100755 --- a/projects/amdsmi/src/rocm_smi.cc +++ b/projects/amdsmi/src/rocm_smi.cc @@ -768,6 +768,23 @@ rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf) { CATCH } +rsmi_status_t +rsmi_perf_determinism_mode_set(uint32_t dv_ind, uint64_t freq_bitmask) { + TRY + DEVICE_MUTEX + + rsmi_status_t ret = rsmi_dev_perf_level_set_v1(dv_ind, + RSMI_DEV_PERF_LEVEL_DETERMINISM); + if (ret != RSMI_STATUS_SUCCESS) { + return ret; + } + ret = rsmi_dev_gpu_clk_freq_set(dv_ind, RSMI_CLK_TYPE_SYS, freq_bitmask); + + return ret; + CATCH +} + + rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { TRY diff --git a/projects/amdsmi/src/rocm_smi_device.cc b/projects/amdsmi/src/rocm_smi_device.cc index ce8fe3797f..f5f6fcdfd5 100755 --- a/projects/amdsmi/src/rocm_smi_device.cc +++ b/projects/amdsmi/src/rocm_smi_device.cc @@ -209,6 +209,7 @@ static const char *kDevPerfLevelStandardStr = "profile_standard"; static const char *kDevPerfLevelMinMClkStr = "profile_min_mclk"; static const char *kDevPerfLevelMinSClkStr = "profile_min_sclk"; static const char *kDevPerfLevelPeakStr = "profile_peak"; +static const char *kDevPerfLevelDeterminismStr = "performance_determinism"; static const char *kDevPerfLevelUnknownStr = "unknown"; static const std::map kDevAttribNameMap = { @@ -283,6 +284,7 @@ static const std::map kDevPerfLvlMap = { {RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK, kDevPerfLevelMinMClkStr}, {RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK, kDevPerfLevelMinSClkStr}, {RSMI_DEV_PERF_LEVEL_STABLE_PEAK, kDevPerfLevelPeakStr}, + {RSMI_DEV_PERF_LEVEL_DETERMINISM, kDevPerfLevelDeterminismStr}, {RSMI_DEV_PERF_LEVEL_UNKNOWN, kDevPerfLevelUnknownStr}, }; @@ -368,7 +370,10 @@ static const std::map kDevFuncDependsMap = { {"rsmi_dev_overdrive_level_get", {{kDevOverDriveLevelFName}, {}}}, {"rsmi_dev_power_profile_presets_get", {{kDevPowerProfileModeFName}, {}}}, {"rsmi_dev_perf_level_set", {{kDevPerfLevelFName}, {}}}, + {"rsmi_dev_perf_level_set_v1", {{kDevPerfLevelFName}, {}}}, {"rsmi_dev_perf_level_get", {{kDevPerfLevelFName}, {}}}, + {"rsmi_perf_determinism_mode_set", {{kDevPerfLevelFName, + kDevGPUSClkFName}, {}}}, {"rsmi_dev_overdrive_level_set", {{kDevOverDriveLevelFName}, {}}}, {"rsmi_dev_vbios_version_get", {{kDevVBiosVerFName}, {}}}, {"rsmi_dev_od_volt_info_get", {{kDevPowerODVoltageFName}, {}}}, diff --git a/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.cc b/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.cc new file mode 100644 index 0000000000..0194f21e62 --- /dev/null +++ b/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.cc @@ -0,0 +1,163 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "rocm_smi/rocm_smi.h" +#include "rocm_smi_test/functional/perf_determinism.h" +#include "rocm_smi_test/test_common.h" + +static const std::map + kDevPerfLvlNameMap = { + {RSMI_DEV_PERF_LEVEL_AUTO, "RSMI_DEV_PERF_LEVEL_AUTO"}, + {RSMI_DEV_PERF_LEVEL_LOW, "RSMI_DEV_PERF_LEVEL_LOW"}, + {RSMI_DEV_PERF_LEVEL_HIGH, "RSMI_DEV_PERF_LEVEL_HIGH"}, + {RSMI_DEV_PERF_LEVEL_MANUAL, "RSMI_DEV_PERF_LEVEL_MANUAL"}, + {RSMI_DEV_PERF_LEVEL_STABLE_STD, "RSMI_DEV_PERF_LEVEL_STABLE_STD"}, + {RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK, + "RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK"}, + {RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK, + "RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK"}, + {RSMI_DEV_PERF_LEVEL_STABLE_PEAK, "RSMI_DEV_PERF_LEVEL_STABLE_PEAK"}, + + {RSMI_DEV_PERF_LEVEL_UNKNOWN, "RSMI_DEV_PERF_LEVEL_UNKNOWN"}, +}; + +TestPerfDeterminism::TestPerfDeterminism() : TestBase() { + set_title("RSMI Performance Determinism Test"); + set_description("The Performance Determinism tests verifies Enabling/Disabling " + "performance determinism mode."); +} + +TestPerfDeterminism::~TestPerfDeterminism(void) { +} + +void TestPerfDeterminism::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestPerfDeterminism::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestPerfDeterminism::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestPerfDeterminism::Close() { + // This will close handles opened within rsmitst utility calls and call + // rsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + + +void TestPerfDeterminism::Run(void) { + rsmi_status_t err; + rsmi_dev_perf_level_t pfl; + rsmi_frequencies_t f; + rsmi_status_t ret; + + TestBase::Run(); + if (setup_failed_) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + return; + } + // Set clocks to something other than the usual default of the lowest + // frequency. + uint64_t freq_bitmask = 0b01100; // Try the 3rd and 4th clocks + std::string freq_bm_str = + std::bitset(freq_bitmask).to_string(); + + freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'), + freq_bm_str.size()-1)); + + for (uint32_t i = 0; i < num_monitor_devs(); ++i) { + PrintDeviceHeader(i); + + err = rsmi_perf_determinism_mode_set(i, freq_bitmask); + + if (err == RSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << "\t** Not supported on this machine" << std::endl; + } + return; + } else { + ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f); + if (ret != RSMI_STATUS_SUCCESS) { + return; + } + + IF_VERB(STANDARD) { + std::cout << "\tFrequency is now index " << f.current << std::endl; + } + ret = rsmi_dev_perf_level_get(i, &pfl); + CHK_ERR_ASRT(ret) + IF_VERB(STANDARD) { + std::cout << "\t**New Perf Level:" << kDevPerfLvlNameMap.at(pfl) << + std::endl; + } + std::cout << "\tResetting performance determinism" << std::endl; + err = rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO);; + CHK_ERR_ASRT(err) + ret = rsmi_dev_perf_level_get(i, &pfl); + CHK_ERR_ASRT(ret) + IF_VERB(STANDARD) { + std::cout << "\t**New Perf Level:" << kDevPerfLvlNameMap.at(pfl) << + std::endl; + } + return; + } + } +} diff --git a/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.h b/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.h new file mode 100644 index 0000000000..64d460aa19 --- /dev/null +++ b/projects/amdsmi/tests/rocm_smi_test/functional/perf_determinism.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_PERF_DETERMINISM_H_ +#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_PERF_DETERMINISM_H_ + +#include "rocm_smi_test/test_base.h" + +class TestPerfDeterminism : public TestBase { + public: + TestPerfDeterminism(); + + // @Brief: Destructor for test case of TestVOltRead + virtual ~TestPerfDeterminism(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_PERF_DETERMINISM_H_ diff --git a/projects/amdsmi/tests/rocm_smi_test/main.cc b/projects/amdsmi/tests/rocm_smi_test/main.cc index 3199b580fb..9077b862b0 100755 --- a/projects/amdsmi/tests/rocm_smi_test/main.cc +++ b/projects/amdsmi/tests/rocm_smi_test/main.cc @@ -83,6 +83,7 @@ #include "functional/init_shutdown_refcount.h" #include "rocm_smi_test/functional/hw_topology_read.h" #include "rocm_smi_test/functional/gpu_metrics_read.h" +#include "rocm_smi_test/functional/perf_determinism.h" static RSMITstGlobals *sRSMIGlvalues = nullptr; @@ -233,6 +234,10 @@ TEST(rsmitstReadOnly, TestGpuMetricsRead) { TestGpuMetricsRead tst; RunGenericTest(&tst); } +TEST(rsmitstReadOnly, TestPerfDeterminism) { + TestPerfDeterminism tst; + RunGenericTest(&tst); +} TEST(rsmitstReadWrite, TestXGMIReadWrite) { TestXGMIReadWrite tst; RunGenericTest(&tst);