From 01401b0caa86366ffa23212eeb00271bc625527e Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Tue, 7 Apr 2020 17:13:50 -0500 Subject: [PATCH] Add device mutual exclusion tests and related fixes * Added a new test to verify mutual exclusion of access to device resources * Added some missing acquiring of mutexes to some RSMI calls, as well as try-catch blocks. Change-Id: I87aac009878a0b2d1f975e1d5b794d887bb23ff9 [ROCm/rocm_smi_lib commit: f8b57c3b16b444c3249b36a28aae6da2e7530d3c] --- .../rocm-smi-lib/include/rocm_smi/rocm_smi.h | 10 +- .../include/rocm_smi/rocm_smi_utils.h | 22 +- projects/rocm-smi-lib/src/rocm_smi.cc | 34 ++- .../src/shared_mutex/shared_mutex.cc | 28 ++- .../src/shared_mutex/shared_mutex.h | 25 +- .../functional/mutual_exclusion.cc | 224 ++++++++++++++++++ .../functional/mutual_exclusion.h | 77 ++++++ .../rocm-smi-lib/tests/rocm_smi_test/main.cc | 26 +- .../tests/rocm_smi_test/test_base.cc | 14 +- .../tests/rocm_smi_test/test_base.h | 7 +- 10 files changed, 429 insertions(+), 38 deletions(-) create mode 100755 projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc create mode 100755 projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.h diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 6a564ea4c1..5bc4f0e5c3 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -118,9 +118,10 @@ typedef enum { //!< input RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to //!< function is not what was expected - RSMI_STATUS_RESOURCE_BUSY, //!< A function timed out trying to - //!< a resource. This could be a - //!< mutex time-out. + RSMI_STATUS_BUSY, //!< A resource or mutex could not be + //!< acquired because it is already + //!< being used + RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred } rsmi_status_t; @@ -131,12 +132,13 @@ typedef enum { */ typedef enum { - RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found + RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found //!< (including non-AMD) to the list //!< of devices from which SMI //!< information can be retrieved. By //!< default, only AMD devices are //!< ennumerated by RSMI. + RSMI_INIT_FLAG_RESRV_TEST1 = 0x800000000000000, //!< Reserved for test } rsmi_init_flags_t; /** diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h index 3bf131f3ca..be5b75c2c9 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h @@ -75,23 +75,35 @@ struct pthread_wrap { public: explicit pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {} - void Acquire() { pthread_mutex_lock(&mutex_); } - void Release() { pthread_mutex_unlock(&mutex_); } + void Acquire() {pthread_mutex_lock(&mutex_);} + int AcquireNB() {return pthread_mutex_trylock(&mutex_);} + void Release() {pthread_mutex_unlock(&mutex_);} private: pthread_mutex_t& mutex_; }; struct ScopedPthread { - explicit ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) { - pthrd_ref_.Acquire(); + explicit ScopedPthread(pthread_wrap& mutex, bool blocking = true) : //NOLINT + pthrd_ref_(mutex), mutex_not_acquired_(false) { + if (blocking) { + pthrd_ref_.Acquire(); + } else { + int ret = pthrd_ref_.AcquireNB(); + if (ret == EBUSY) { + mutex_not_acquired_ = true; + } + } } ~ScopedPthread() { pthrd_ref_.Release(); } + + bool mutex_not_acquired() {return mutex_not_acquired_;} + private: ScopedPthread(const ScopedPthread&); - pthread_wrap& pthrd_ref_; + bool mutex_not_acquired_; // Use for AcquireNB (not for Aquire()) }; } // namespace smi } // namespace amd diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 75a1d3aa2e..5eca70e3c4 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,6 @@ static rsmi_status_t handleException() { try { throw; } catch (const std::bad_alloc& e) { - debug_print("RSMI exception: BadAlloc\n"); return RSMI_STATUS_OUT_OF_RESOURCES; } catch (const amd::smi::rsmi_exception& e) { debug_print("Exception caught: %s.\n", e.what()); @@ -123,7 +123,12 @@ static rsmi_status_t handleException() { #define DEVICE_MUTEX \ amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \ - amd::smi::ScopedPthread _lock(_pw); + amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); \ + bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); \ + amd::smi::ScopedPthread _lock(_pw, blocking_); \ + if (!blocking_ && _lock.mutex_not_acquired()) { \ + return RSMI_STATUS_BUSY; \ + } /* This group of macros is used to facilitate checking of support for rsmi_dev* * "getter" functions. When the return buffer is set to nullptr, the macro will @@ -1638,10 +1643,13 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) { rsmi_status_t rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len) { + TRY CHK_SUPPORT_NAME_ONLY(brand) if (len == 0) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + std::map brand_names = { {"D05121", "mi25"}, {"D05131", "mi25"}, @@ -1676,6 +1684,7 @@ rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len) { // If there is no SKU match, return marketing name instead rsmi_dev_name_get(dv_ind, brand, len); return RSMI_STATUS_SUCCESS; + CATCH } rsmi_status_t @@ -2501,6 +2510,7 @@ rsmi_status_t rsmi_dev_serial_number_get(uint32_t dv_ind, } TRY + DEVICE_MUTEX std::string val_str; rsmi_status_t ret = get_dev_value_str(amd::smi::kDevSerialNumber, @@ -3146,3 +3156,23 @@ rsmi_func_iter_next(rsmi_func_id_iter_handle_t handle) { CATCH } + +// UNDOCUMENTED FUNCTIONS +// This functions are not declared in rocm_smi.h. They are either not fully +// supported, or to be used for test purposes. + +// This function acquires a mutex and waits for a number of seconds +rsmi_status_t +rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds) { +// DEVICE_MUTEX + amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); + amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); + bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); + amd::smi::ScopedPthread _lock(_pw, blocking_); + if (!blocking_ && _lock.mutex_not_acquired()) { + return RSMI_STATUS_BUSY; + } + + sleep(seconds); + return RSMI_STATUS_SUCCESS; +} diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.cc b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.cc index db17d1769d..6ccd546c72 100755 --- a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.cc +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.cc @@ -1,4 +1,26 @@ -// NOLINT(legal/copyright) +/* +Modifications Copyright © 2019 – 2020 Advanced Micro Devices, Inc. All Rights +Reserved. +Copyright (c) 2018 Oleg Yamnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ #include "shared_mutex.h" // NOLINT(build/include) #include // errno, ENOENT #include // O_RDWR, O_CREATE @@ -59,7 +81,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode) { pthread_mutex_t *mutex_ptr = reinterpret_cast(addr); // Make sure the mutex wasn't left in a locked state. If we can't - // acquire it in 3 sec., re-do everything. + // acquire it in 5 sec., re-do everything. struct timespec expireTime; clock_gettime(CLOCK_REALTIME, &expireTime); expireTime.tv_sec += 5; @@ -75,7 +97,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode) { " /dev/shm."); free(mutex.name); - throw amd::smi::rsmi_exception(RSMI_STATUS_RESOURCE_BUSY, __FUNCTION__); + throw amd::smi::rsmi_exception(RSMI_STATUS_BUSY, __FUNCTION__); return mutex; } else { if (pthread_mutex_unlock(mutex_ptr)) { diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h index 77aff5509d..7fc5ce845e 100755 --- a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h @@ -1,5 +1,26 @@ -// NOLINT(legal/copyright) -// See LICENSE file +/* +Modifications Copyright © 2019 – 2020 Advanced Micro Devices, Inc. All Rights +Reserved. +Copyright (c) 2018 Oleg Yamnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ #ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_ #define SRC_SHARED_MUTEX_SHARED_MUTEX_H_ diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc new file mode 100755 index 0000000000..e0ab317ca3 --- /dev/null +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc @@ -0,0 +1,224 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "rocm_smi/rocm_smi.h" +#include "rocm_smi_test/functional/mutual_exclusion.h" +#include "rocm_smi_test/test_common.h" + + +TestMutualExclusion::TestMutualExclusion() : TestBase() { + set_title("Mutual Exclusion Test"); + set_description("Verify that RSMI only allows 1 process at a time" + " to access RSMI resources (primarily sysfs files). This test has one " + "process that obtains the mutex that ensures only 1 process accesses a " + "device's sysfs files at a time, and another process that attempts " + "to access the device's sysfs files. The second process should fail " + "in these attempts."); +} + +TestMutualExclusion::~TestMutualExclusion(void) { +} + +extern rsmi_status_t rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds); + +void TestMutualExclusion::SetUp(void) { + std::string label; + rsmi_status_t ret; + + // TestBase::SetUp(RSMI_INIT_FLAG_RESRV_TEST1); + MakeHeaderStr(kSetupLabel, &label); + printf("\n\t%s\n", label.c_str()); + + sleeper_process_ = false; + child_ = 0; + child_ = fork(); + + if (child_ != 0) { + sleeper_process_ = true; // sleeper_process is parent + + // RSMI_INIT_FLAG_RESRV_TEST1 tells rsmi to fail immediately + // if it can't get the mutex instead of waiting. + ret = rsmi_init(RSMI_INIT_FLAG_RESRV_TEST1); + ASSERT_EQ(ret, RSMI_STATUS_SUCCESS); + + sleep(2); // Let both processes get through rsmi_init + } else { + sleep(1); // Let the sleeper process get through rsmi_init() before + // this one goes, so it doesn't fail. + ret = rsmi_init(RSMI_INIT_FLAG_RESRV_TEST1); + ASSERT_EQ(ret, RSMI_STATUS_SUCCESS); + + sleep(2); // Let both processes get through rsmi_init; + } + return; +} + +void TestMutualExclusion::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestMutualExclusion::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestMutualExclusion::Close() { + // This will close handles opened within rsmitst utility calls and call + // rsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +extern rsmi_status_t +rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds); + +void TestMutualExclusion::Run(void) { + rsmi_status_t ret; + + if (sleeper_process_) { + std::cout << "MUTEX_HOLDER process: started sleeping for 10 seconds..." << + std::endl; + ret = rsmi_test_sleep(0, 10); + ASSERT_EQ(ret, RSMI_STATUS_SUCCESS); + std::cout << "MUTEX_HOLDER process: Sleep process woke up." << std::endl; + pid_t cpid = wait(nullptr); + ASSERT_EQ(cpid, child_); + } else { + // Both processes should have completed rsmi_init(). + // let the other process get started on rsmi_test_sleep(). + sleep(2); + TestBase::Run(); + std::cout << "TESTER process: verifing that all rsmi_dev_* functions " + "return RSMI_STATUS_BUSY because MUTEX_HOLDER process " + "holds the mutex" << std::endl; + // Try all the device related rsmi calls. They should all fail with + // RSMI_STATUS_BUSY + // Set dummy values should to working, deterministic values. + uint16_t dmy_ui16 = 0; + uint32_t dmy_ui32 = 1; + int32_t dmy_i32 = 0; + uint64_t dmy_ui64 = 0; + int64_t dmy_i64 = 0; + char dmy_str[10]; + rsmi_dev_perf_level_t dmy_perf_lvl; + rsmi_frequencies_t dmy_freqs; + rsmi_od_volt_freq_data_t dmy_od_volt; + rsmi_freq_volt_region_t dmy_vlt_reg; + rsmi_error_count_t dmy_err_cnt; + rsmi_ras_err_state_t dmy_ras_err_st; + + ret = rsmi_dev_id_get(0, &dmy_ui16); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_vendor_id_get(0, &dmy_ui16); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_name_get(0, dmy_str, 10); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_brand_get(0, dmy_str, 10); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_vendor_name_get(0, dmy_str, 10); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_vram_vendor_get(0, dmy_str, 10); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_serial_number_get(0, dmy_str, 10); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_subsystem_id_get(0, &dmy_ui16); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_subsystem_vendor_id_get(0, &dmy_ui16); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_unique_id_get(0, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_pci_id_get(0, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_pci_throughput_get(0, &dmy_ui64, &dmy_ui64, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_pci_replay_counter_get(0, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_pci_bandwidth_set(0, 0); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_fan_rpms_get(0, dmy_ui32, &dmy_i64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_fan_speed_get(0, 0, &dmy_i64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_fan_speed_max_get(0, 0, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_temp_metric_get(0, dmy_ui32, RSMI_TEMP_CURRENT, &dmy_i64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_fan_reset(0, 0); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_fan_speed_set(0, dmy_ui32, 0); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_perf_level_get(0, &dmy_perf_lvl); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_overdrive_level_get(0, &dmy_ui32); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_gpu_clk_freq_get(0, RSMI_CLK_TYPE_SYS, &dmy_freqs); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_od_volt_info_get(0, &dmy_od_volt); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_od_volt_curve_regions_get(0, &dmy_ui32, &dmy_vlt_reg); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_overdrive_level_set(dmy_i32, 0); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_gpu_clk_freq_set(0, RSMI_CLK_TYPE_SYS, 0); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_ecc_count_get(0, RSMI_GPU_BLOCK_UMC, &dmy_err_cnt); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_ecc_enabled_get(0, &dmy_ui64); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + ret = rsmi_dev_ecc_status_get(0, RSMI_GPU_BLOCK_UMC, &dmy_ras_err_st); + ASSERT_EQ(ret, RSMI_STATUS_BUSY); + + std::cout << "TESTER process: Finished verifying that all " + "rsmi_dev_* functions returned RSMI_STATUS_BUSY" << std::endl; + exit(0); + } +} diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.h b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.h new file mode 100755 index 0000000000..d37355c71c --- /dev/null +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.h @@ -0,0 +1,77 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2020, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_MUTUAL_EXCLUSION_H_ +#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_MUTUAL_EXCLUSION_H_ + +#include "rocm_smi_test/test_base.h" + +class TestMutualExclusion : public TestBase { + public: + TestMutualExclusion(); + + // @Brief: Destructor for test case of TestMutualExclusion + virtual ~TestMutualExclusion(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + private: + bool sleeper_process_; + int child_; +}; + +#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_MUTUAL_EXCLUSION_H_ diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/main.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/main.cc index 149a4ed738..ed1891da05 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/main.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/main.cc @@ -77,6 +77,7 @@ #include "functional/xgmi_read_write.h" #include "functional/mem_page_info_read.h" #include "functional/api_support_read.h" +#include "functional/mutual_exclusion.h" static RSMITstGlobals *sRSMIGlvalues = nullptr; @@ -223,6 +224,14 @@ TEST(rsmitstReadOnly, TestAPISupportRead) { TestAPISupportRead tst; RunGenericTest(&tst); } +TEST(rsmitstReadOnly, TestMutualExclusion) { + TestMutualExclusion test; + + test.DisplayTestInfo(); + test.SetUp(); + test.Run(); + RunCustomTestEpilog(&test); +} int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); @@ -242,22 +251,5 @@ int main(int argc, char** argv) { int ret = 0; sRSMIGlvalues = &settings; - ret = RUN_ALL_TESTS(); - - if (ret) { - return ret; - } - - settings.init_options = RSMI_INIT_FLAG_ALL_GPUS; - - std::cout << "****************************************" << std::endl; - std::cout << "****************************************" << std::endl; - std::cout << "****************************************" << std::endl; - std::cout << "Re-running tests with init options: " << std::hex << - settings.init_options << std::endl; - std::cout << "****************************************" << std::endl; - std::cout << "****************************************" << std::endl; - std::cout << "****************************************" << std::endl; - settings.init_options = RSMI_INIT_FLAG_ALL_GPUS; return RUN_ALL_TESTS(); } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc index d117202435..fb6d9211eb 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc @@ -54,18 +54,19 @@ static const int kOutputLineLength = 80; static const char kLabelDelimiter[] = "####"; static const char kDescriptionLabel[] = "TEST DESCRIPTION"; static const char kTitleLabel[] = "TEST NAME"; -static const char kSetupLabel[] = "TEST SETUP"; static const char kRunLabel[] = "TEST EXECUTION"; static const char kCloseLabel[] = "TEST CLEAN UP"; static const char kResultsLabel[] = "TEST RESULTS"; +// This one is used outside this file +const char kSetupLabel[] = "TEST SETUP"; TestBase::TestBase() : setup_failed_(false), description_("") { } TestBase::~TestBase() { } -static void MakeHeaderStr(const char *inStr, std::string *outStr) { +void MakeHeaderStr(const char *inStr, std::string *outStr) { assert(outStr != nullptr); assert(inStr != nullptr); @@ -77,14 +78,19 @@ static void MakeHeaderStr(const char *inStr, std::string *outStr) { *outStr += kLabelDelimiter; } -void TestBase::SetUp(void) { +void TestBase::SetUp(uint64_t init_flags) { std::string label; rsmi_status_t err; MakeHeaderStr(kSetupLabel, &label); printf("\n\t%s\n", label.c_str()); - err = rsmi_init(init_options()); + if (init_flags) { + err = rsmi_init(init_flags); + } else { + err = rsmi_init(init_options()); + } + if (err != RSMI_STATUS_SUCCESS) { setup_failed_ = true; } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h index 24377a1c7e..a1e324c114 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h @@ -57,7 +57,9 @@ class TestBase { // @Brief: Before run the core measure codes, do something to set up // i.e. init runtime, prepare packet... - virtual void SetUp(void); + // The init_flags option will override any flags set for the whole test + // suite + virtual void SetUp(uint64_t init_flags = 0); // @Brief: Core measurement codes executing here virtual void Run(void); @@ -135,4 +137,7 @@ class TestBase { } \ } +void MakeHeaderStr(const char *inStr, std::string *outStr); +extern const char kSetupLabel[]; + #endif // TESTS_ROCM_SMI_TEST_TEST_BASE_H_