From 32ffaeb7d10f883fd8f7d8d4ec4e7fa9139c07e6 Mon Sep 17 00:00:00 2001 From: rohit pathania Date: Wed, 2 May 2018 15:04:25 +0530 Subject: [PATCH] Memory Concurrent tests for pool Memory allocate, Memory free and get pool info Change-Id: I6a1343348e400fe466e041d651adaa67be561a21 [ROCm/ROCR-Runtime commit: 47af1d673ed31f46acd49e37f1331955a39f9c52] --- .../rocrtst/common/concurrent_utils.cc | 317 +++++++++++ .../rocrtst/common/concurrent_utils.h | 213 +++++++ .../suites/stress/memory_concurrent_tests.cc | 537 ++++++++++++++++++ .../suites/stress/memory_concurrent_tests.h | 109 ++++ .../rocrtst/suites/test_common/CMakeLists.txt | 3 +- .../rocrtst/suites/test_common/main.cc | 21 + 6 files changed, 1199 insertions(+), 1 deletion(-) create mode 100755 projects/rocr-runtime/rocrtst/common/concurrent_utils.cc create mode 100755 projects/rocr-runtime/rocrtst/common/concurrent_utils.h create mode 100755 projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc create mode 100755 projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.h diff --git a/projects/rocr-runtime/rocrtst/common/concurrent_utils.cc b/projects/rocr-runtime/rocrtst/common/concurrent_utils.cc new file mode 100755 index 0000000000..06ba15f2a7 --- /dev/null +++ b/projects/rocr-runtime/rocrtst/common/concurrent_utils.cc @@ -0,0 +1,317 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include "common/concurrent_utils.h" + +namespace rocrtst { + +/** + * @brief worker function is invoked by each thread to execute tests + * Initially, all threads are blocked to wait run_flag. After run_flag being + * set up, the worker function begin to execute test function and change + * the status of tests to TEST_RUNNING. After test function finish, the status + * of tests will be changed to TEST_FINISHED, and worker function will be + * blocked until run_flag being set up again. + * @param input Pointer to thread_aux data structure, which contains test + * function pointer and corresponding args for the test function, and other + * auxiliary information, including status of test, number of running tests, + * run_flag, exit_flag, etc. + */ + +static void *worker(void *input) { + func_ptr fun_prt; + thread_aux* thread = reinterpret_cast(input); + fun_prt = reinterpret_cast(thread->test->fun_prt); + int run_flag_l = 0; + + // While loop to repeatedly execute test function + while (1) { + pthread_mutex_lock(thread->test_mutex); + // Blocked to wait run_flag or exit_flag being changed + while (*thread->run_flag == run_flag_l && *thread->exit_flag == 0) { + pthread_cond_wait(thread->test_cond, thread->test_mutex); + } + pthread_mutex_unlock(thread->test_mutex); + + // Reset run_flag + run_flag_l = run_flag_l ^ 1; + + // If exit_flag is 0, run test function and set status of the test to + // TEST_RUNNING + if (*thread->exit_flag == 0) { + thread->test->status = TEST_RUNNING; + fun_prt(thread->test->data); + + // After test function finish, subtract the number of running tests via atomic operations + // and check the number of running tests, if the number equal to 1, + // it means all tests are finished, broadcast a signal to the wakeup master + // thread. + pthread_mutex_lock(thread->test_mutex); + (*(thread->num_running_t))--; + + if ((*thread->num_running_t) == 0) { + pthread_cond_broadcast(thread->test_cond); + } + pthread_mutex_unlock(thread->test_mutex); + + // Set status of the test to TEST_STOP + thread->test->status = TEST_STOP; + } else { + // If exit_flag is no-zero, set status of the test to TEST_FINISHED + thread->test->status = TEST_FINISHED; + pthread_exit(NULL); + } + } + return NULL; +} + +/** + * @brief create a test_group data structure, initialize variables in + * the test_group structure, allocate a test_list of group_size and + * return a pointer to the test_group. + * @param group_size The size of test group, i.e., the size of test lists + * @return Pointer to the new test_group + */ +test_group *TestGroupCreate(size_t group_size) { + test_group *new_group = static_cast(malloc(sizeof(test_group))); + // initialize variables in the data structure + new_group->group_size = group_size; + new_group->n_threads = 0; + new_group->num_test = 0; + new_group->run_flag = 0; + new_group->exit_flag = 0; + new_group->num_running_t = 0; + // malloc test_list array with group_size + new_group->test_list = static_cast(malloc(sizeof(test_aux) * group_size)); + + return new_group; +} + +void TestGroupWait(test_group *t_group) { + pthread_mutex_lock(&t_group->test_mutex); + while (t_group->num_running_t != 0) { + pthread_cond_wait(&t_group->test_cond, &t_group->test_mutex); + } + pthread_mutex_unlock(&t_group->test_mutex); + + return; +} + +void TestGroupAdd(test_group *t_group, func_ptr fun_prt, void *data, size_t num_copy) { + if (t_group->group_size < (num_copy + t_group->num_test)) { + fprintf(stderr, "Error beyound group size: %lu, please resize the test_group\n", t_group->group_size); + return; + } + + int num_test = t_group->num_test; + test_aux *test_list = t_group->test_list; + unsigned int ii; + for (ii = 0; ii < num_copy; ii++) { + test_list[num_test + ii].fun_prt = reinterpret_cast(fun_prt); + test_list[num_test + ii].data = data; + test_list[num_test + ii].status = TEST_NOT_STARTED; + } + t_group->num_test = num_test + num_copy; + + return; +} + +void TestGroupResize(test_group *t_group, size_t new_group_size) { + if (new_group_size < t_group->group_size) { + fprintf(stderr, "Error new group_size is smaller than current group_size\n"); + } + + test_aux *new_test_list; + new_test_list = static_cast(realloc(t_group->test_list, new_group_size * sizeof(test_aux))); + t_group->group_size = new_group_size; + t_group->test_list = new_test_list; + + return; +} + +// Create threads for tests +void TestGroupThreadCreate(test_group *t_group) { + pthread_mutex_init(&(t_group->test_mutex), NULL); + pthread_cond_init(&(t_group->test_cond), NULL); + pthread_attr_init(&(t_group->attr)); + pthread_attr_setdetachstate(&(t_group->attr), PTHREAD_CREATE_JOINABLE); + + int n_threads; + int ii = 0; + + n_threads = t_group->n_threads = t_group->num_test; + thread_aux *thread_list = t_group->thread_list = + static_cast(malloc(sizeof(thread_aux) * n_threads)); + t_group->tid = static_cast(malloc(sizeof(pthread_t) * n_threads)); + + for (ii = 0; ii < n_threads; ++ii) { + // CPU_ZERO(&thread_list[ii].cpuset); + thread_list[ii].tid = ii; + thread_list[ii].test = t_group->test_list + ii; + thread_list[ii].run_flag = &(t_group->run_flag); + thread_list[ii].exit_flag = &(t_group->exit_flag); + thread_list[ii].test_mutex = &(t_group->test_mutex); + thread_list[ii].test_cond = &(t_group->test_cond); + thread_list[ii].num_running_t = &(t_group->num_running_t); + int status = pthread_create(t_group->tid + ii, &(t_group->attr), worker, thread_list + ii); + if (status < 0) { + perror("pthread_create failed"); + } + } + + return; +} + +// Return number of test +int TestGroupNumTests(test_group *t_group) { + return t_group->num_test; +} + +// Set affinity of the specific test +void TestGroupThreadAffinity(test_group *t_group, int test_id, int cpu_id) { +/* Setting CPU affinity isn't currently supported. + * CPU_SET(cpu_id, &t_group->thread_list[test_id].cpuset); + * int status; + * status = pthread_setaffinity_np(t_group->tid[test_id], + * sizeof(cpu_set_t), &t_group->thread_list[test_id].cpuset); + * if (status != 0) { + * perror("pthread_setaffinity_np error"); + * } + */ + return; +} + +// Set run_flag to 1 +void TestGroupStart(test_group *t_group) { + if (t_group->num_running_t != 0) { + fprintf(stderr, "Error: %d tests are not finished\n", t_group->num_running_t); + return; + } + + pthread_mutex_lock(&t_group->test_mutex); + t_group->run_flag = t_group->run_flag ^ 1; + t_group->num_running_t = t_group->num_test; + pthread_cond_broadcast(&t_group->test_cond); + pthread_mutex_unlock(&t_group->test_mutex); + + return; +} + +// Set exit_flag to 1, wait all threads finish and cleanup +void TestGroupExit(test_group *t_group) { + int ii = 0; + int status; + + pthread_mutex_lock(&t_group->test_mutex); + t_group->exit_flag = 1; + pthread_cond_broadcast(&t_group->test_cond); + pthread_mutex_unlock(&t_group->test_mutex); + + for (ii = 0; ii < t_group->n_threads; ++ii) { + status = pthread_join(t_group->tid[ii], 0); + if (status < 0) { + perror("pthread_join failed"); + t_group->test_list[ii].status = TEST_ERROR; + } + } + + pthread_attr_destroy(&(t_group->attr)); + pthread_mutex_destroy(&(t_group->test_mutex)); + pthread_cond_destroy(&(t_group->test_cond)); + + free(t_group->tid); + free(t_group->thread_list); + + return; +} + +void TestGroupKill(test_group *t_group) { + int ii = 0; + int status; + for (ii = 0; ii < t_group->n_threads; ++ii) { + status = pthread_cancel(t_group->tid[ii]); + if (status < 0) { + perror("pthread_cancel failed"); + t_group->test_list[ii].status = TEST_ERROR; + } + } + + pthread_attr_destroy(&(t_group->attr)); + pthread_mutex_destroy(&(t_group->test_mutex)); + pthread_cond_destroy(&(t_group->test_cond)); + + free(t_group->tid); + free(t_group->thread_list); + + return; +} + +void TestGroupDestroy(test_group *t_group) { + free(t_group->test_list); + free(t_group); + + return; +} + +int TestGroupTestStatus(test_group *t_group, int test_id) { + if (test_id >= t_group->n_threads) { + fprintf(stderr, "test_id: %d is larger than the number of test: %d\n", test_id, t_group->num_test); + } + + if (t_group->test_list[test_id].status == TEST_RUNNING) { + if (pthread_kill(t_group->tid[test_id], 0) == ESRCH) { + t_group->test_list[test_id].status = TEST_ERROR; + } + } + + return t_group->test_list[test_id].status; +} + +} // namespace rocrtst diff --git a/projects/rocr-runtime/rocrtst/common/concurrent_utils.h b/projects/rocr-runtime/rocrtst/common/concurrent_utils.h new file mode 100755 index 0000000000..117f7a5f9b --- /dev/null +++ b/projects/rocr-runtime/rocrtst/common/concurrent_utils.h @@ -0,0 +1,213 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#ifndef ROCRTST_COMMON_CONCURRENT_UTILS_H_ +#define ROCRTST_COMMON_CONCURRENT_UTILS_H_ + +#include +#include +#include + + +namespace rocrtst { +/** + * @enum TEST_STATUS + * @brief This enum lists status of test pthread + */ +enum TEST_STATUS {TEST_NOT_STARTED, TEST_RUNNING, + TEST_STOP, TEST_FINISHED, TEST_ERROR}; + + +typedef void (*func_ptr)(void *input); +/** + * @struct test_aux + * @brief This structure holds information for a test + */ +struct test_aux{ + // Pointer to the test function + void *fun_prt; + // Pointer to the data for the test function + void *data; + // status of the test listed in enum TEST_STATUS + uint16_t status; +}; + +/** + * @struct thread_aux + * @brief This structure holds the data for a test thread. + */ +struct thread_aux { + // Thread Id + int tid; + // Pointer to a test item + test_aux *test; + // Pointer to the run_flag shared in the test group + volatile int *run_flag; + // Pointer to the exit_flag shared in the test group + volatile int *exit_flag; + // Pointer to the pthread mutex shared in the test group + pthread_mutex_t *test_mutex; + // Pointer to the pthread condition shared in the test group + pthread_cond_t *test_cond; + // Pointer to the number of running tests + volatile unsigned int *num_running_t; +}; + +/** + * @struct test_group + * @brief This structure holds data for a test group + */ +struct test_group { + // test group size, i.e., size of test_list array + size_t group_size; + // number of test + int num_test; + // number of threads - since one test per thread, equal to num_test + int n_threads; + // a flag for telling all threads to run - 0: stop, 1: run + volatile int run_flag; + // a flag for telling all threads to finish - 1: exit + volatile int exit_flag; + // pthread tid + pthread_t *tid; + // pthread attr + pthread_attr_t attr; + // pthread mutex shared in a group + pthread_mutex_t test_mutex; + // pthread condition signal shared in a group + pthread_cond_t test_cond; + // the list of test info + test_aux *test_list; + // the list of thread info + thread_aux *thread_list; + // number of running tests + volatile unsigned int num_running_t; +}; + +/** + * @brief create a test group, and preallocate + * test_list array with group_size + * @return initialized struct test_group + */ +test_group* TestGroupCreate(size_t group_size); + +/** + * @brief resize the array of test_list + * @return + */ +void TestGroupResize(test_group *t_group, size_t new_group_size); + +/** + * @brief add a new test into the specific test group + * @param t_group Pointer to a test group + * @param fun Pointer to the test function + * @param data Pointer to data for the test function + * @param num_copy Number of copies of the test + */ +void TestGroupAdd(test_group *t_group, func_ptr fun, + void *data, size_t num_copy); + +/** + * @brief create threads for tests in a test group + * @param t_group Pointer to a test group + */ +void TestGroupThreadCreate(test_group *t_group); + +/** + * @brief return the number of tests in a test group + * @param t_group Pointer to a test group + */ +int TestGroupNumTests(test_group *t_group); + +/** + * @brief run all threads/tests in a test group + * @param t_group Pointer to a test group + */ +void TestGroupStart(test_group *t_group); + +/** + * @brief wait all threads/tests in a test group finish + * The function is blocked until all threads are finished + * @param t_group Pointer to a test group + */ +void TestGroupWait(test_group *t_group); + +/** + * @brief terminate all threads/tests in a test group by sending a signal + * set exit_flag to 1, wait until all threads are finished + * @param t_group Pointer to a test group + */ +void TestGroupExit(test_group *t_group); + +/** + * @brief destroy a test group, release all resources + * @param t_group Pointer to a test group + */ +void TestGroupDestroy(test_group *t_group); + +/** + * @brief check the status of specific test in a test group + * @param t_group Pointer to a test group + * @param test_id Test No. + * @return the status of the test listed in enum TEST_STATUS + */ +int TestGroupTestStatus(test_group *t_group, int test_id); + +/** + * @brief set affinity of the specific test + * @param t_group Pointer to a test group + * @param test_id Test No. + * @param cpu_id CPU No. that the test is binded to + */ +void TestGroupThreadAffinity(test_group *t_group, + int test_id, int cpu_id); + +/** + * @brief force kill a test group + * @param t_group Pointer to a test group + */ +void TestGroupKill(test_group *t_group); +} // namespace rocrtst +#endif // ROCRTST_COMMON_CONCURRENT_UTILS_H_ diff --git a/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc new file mode 100755 index 0000000000..1167e2f4b5 --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc @@ -0,0 +1,537 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include + +#include "suites/stress/memory_concurrent_tests.h" +#include "common/base_rocr_utils.h" +#include "common/common.h" +#include "common/helper_funcs.h" +#include "common/hsatimer.h" +#include "common/concurrent_utils.h" +#include "gtest/gtest.h" +#include "hsa/hsa.h" +#include "hsa/hsa_ext_finalize.h" + + +static const uint32_t kNumThreads = 1024; +static const uint32_t kMaxAllocSize = 1024 * 1024; + + + +#define RET_IF_HSA_ERR(err) { \ + if ((err) != HSA_STATUS_SUCCESS) { \ + const char* msg = 0; \ + hsa_status_string(err, &msg); \ + std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \ + __FILE__ << ". Call returned " << err << std::endl; \ + std::cout << msg << std::endl; \ + return (err); \ + } \ +} + + +typedef struct control_block { + hsa_amd_memory_pool_t* pool; + size_t alloc_size; + void* alloc_pointer; +} cb_t; + + +// Callback function which will call upon when need +// to allocate memory from the pool in the thread. +static void CallbackHSAMemoryAllocateFunc(void *data) { + hsa_status_t err; + cb_t *cb = static_cast(data); + + err = hsa_amd_memory_pool_allocate(*(cb->pool), + cb->alloc_size, 0, + reinterpret_cast(&(cb->alloc_pointer))); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + return; +} + +// Callback function which will call upon when need +// to Free memory from the pool in the thread. +static void CallbackHSAMemoryFreeFunc(void *data) { + hsa_status_t err; + cb_t *cb = static_cast(data); + + err = hsa_memory_free(cb->alloc_pointer); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + return; +} + +typedef struct thread_data_get_pool_info_s { + // The current pool + hsa_amd_memory_pool_t pool; + // The pool info retrieved from main thread + rocrtst::pool_info_t* info; + // Consistency check result + int consistency; +} thread_data_get_pool_info_t; + +// Callback function which will call upon when need +// to Fetch different info for the pool in the thread. +static void CallbackGetPoolInfo(void* data) { + hsa_status_t err; + + thread_data_get_pool_info_t* thread_data = + static_cast(data); + + rocrtst::pool_info_t info; + memset(&info, 0, sizeof(rocrtst::pool_info_t)); + err = rocrtst::AcquirePoolInfo(thread_data->pool, &info); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + if (0 == memcmp(thread_data->info, &info, sizeof(rocrtst::pool_info_t))) { + // The pool info is consistent with the one got from the main thread + thread_data->consistency = 1; + } else { + thread_data->consistency = 0; + } +} + +MemoryConcurrentTest::MemoryConcurrentTest(bool launch_Concurrent_Allocate_, + bool launch_Concurrent_Free_ , + bool launch_Concurrent_PoolGetInfo_) :TestBase() { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + + std::string name; + std::string desc; + + name = "RocR Memory Concurrent"; + desc = "These series of tests are Stress tests which contains different subtests "; + + if (launch_Concurrent_Allocate_) { + name += " Allocate"; + desc += " This test Verify that memory can be concurrently allocated from pool" + " and thread safety while allocating memory from different threads" + " on ROCR agents"; + } else if (launch_Concurrent_Free_) { + name += " Free"; + desc += " This test thet memory Verify can be concurrently freed from pool" + " and thread safety while memory free from different threads" + " on ROCR agents"; + } else if (launch_Concurrent_PoolGetInfo_) { + name += " PoolGetInfo"; + desc += " This test Verify that memory pool info can be concurrently " + " get from different threads on ROCR agents"; + } + set_title(name); + set_description(desc); +} + +MemoryConcurrentTest::~MemoryConcurrentTest(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +void MemoryConcurrentTest::SetUp(void) { + hsa_status_t err; + + TestBase::SetUp(); + + err = rocrtst::SetDefaultAgents(this); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + err = rocrtst::SetPoolsTypical(this); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + return; +} + +void MemoryConcurrentTest::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void MemoryConcurrentTest::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void MemoryConcurrentTest::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + return; +} + +void MemoryConcurrentTest::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + + + + +static const char kSubTestSeparator[] = " **************************"; + +static void PrintMemorySubtestHeader(const char *header) { + std::cout << " *** Memory Stress Subtest: " << header << " ***" << std::endl; +} + +static void PrintAgentNameAndType(hsa_agent_t agent) { + hsa_status_t err; + + char ag_name[64]; + hsa_device_type_t ag_type; + + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + std::cout << " Agent: " << ag_name << " ("; + switch (ag_type) { + case HSA_DEVICE_TYPE_CPU: + std::cout << "CPU)"; + break; + case HSA_DEVICE_TYPE_GPU: + std::cout << "GPU)"; + break; + case HSA_DEVICE_TYPE_DSP: + std::cout << "DSP)"; + break; + } + std::cout << std::endl; + return; +} + +// This test verify check memory can be +// concurrently allocated from pool on ROCR agents +void MemoryConcurrentTest::MemoryConcurrentAllocate(hsa_agent_t agent, + hsa_amd_memory_pool_t pool) { + hsa_status_t err; + + rocrtst::pool_info_t pool_i; + err = rocrtst::AcquirePoolInfo(pool, &pool_i); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + if (verbosity() > 0) { + PrintAgentNameAndType(agent); + } + + // Determine if allocation is allowed in this memory pool + bool alloc = false; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc); + + if (alloc) { + size_t alloc_size; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &alloc_size); + // Adjust the size to the minimum of 1024 or max alloc size + alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize; + // Create a test group + rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads); + + // The control blocks are used to pass data to the threads + uint32_t kk; + cb_t cb[kNumThreads]; + for (kk = 0; kk < kNumThreads; kk++) { + cb[kk].pool = &pool; + cb[kk].alloc_size = alloc_size; + rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryAllocateFunc, &cb[kk], 1); + } + + // Create threads for each test + rocrtst::TestGroupThreadCreate(tg_concurrent); + + // Start to run tests + rocrtst::TestGroupStart(tg_concurrent); + + // Wait all tests finish + rocrtst::TestGroupWait(tg_concurrent); + + // Exit all tests + rocrtst::TestGroupExit(tg_concurrent); + + // Destroy thread group and cleanup resources + rocrtst::TestGroupDestroy(tg_concurrent); + + // Check for overlapping addresses + char *addr1, *addr2; + for (kk = 0; kk < kNumThreads; ++kk) { + addr1 = reinterpret_cast(cb[kk].alloc_pointer); + addr2 = addr1+alloc_size; + ASSERT_NE(reinterpret_cast(addr1), nullptr); + uint32_t ll; + for (ll = kk+1; ll < kNumThreads; ++ll) { + if (addr1 < reinterpret_cast(cb[ll].alloc_pointer)) { + ASSERT_LE(addr2, reinterpret_cast(cb[ll].alloc_pointer)); + } + if (addr2 > reinterpret_cast(cb[ll].alloc_pointer)+alloc_size) { + ASSERT_GE(addr1, reinterpret_cast(cb[ll].alloc_pointer)+alloc_size); + } + } + } + + for (uint32_t ii = 0; ii < kNumThreads; ii++) { + err = hsa_memory_free(cb[ii].alloc_pointer); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + } + } + return; +} + + + + +// This test verify check memory can be +// concurrently allocated from pool on ROCR agents +void MemoryConcurrentTest::MemoryConcurrentFree(hsa_agent_t agent, + hsa_amd_memory_pool_t pool) { + hsa_status_t err; + + rocrtst::pool_info_t pool_i; + err = rocrtst::AcquirePoolInfo(pool, &pool_i); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + if (verbosity() > 0) { + PrintAgentNameAndType(agent); + } + + // Determine if allocation is allowed in this pool + bool alloc = false; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + if (alloc) { + // Get the maximum allocation size + size_t alloc_size; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &alloc_size); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // Adjust the size to the minimum of 1024 or max alloc size + alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize; + + // Create a test group + rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads); + + // The control blocks are used to pass data to the threads + uint32_t kk; + cb_t cb[kNumThreads]; + for (kk = 0; kk < kNumThreads; kk++) { + cb[kk].pool = &pool; + cb[kk].alloc_size = alloc_size; + err = hsa_amd_memory_pool_allocate(*(cb[kk].pool), cb[kk].alloc_size, 0, &(cb[kk].alloc_pointer)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryFreeFunc, &cb[kk], 1); + } + + // Create threads for each test + rocrtst::TestGroupThreadCreate(tg_concurrent); + + // Start to run tests + rocrtst::TestGroupStart(tg_concurrent); + + // Wait all tests finish + rocrtst::TestGroupWait(tg_concurrent); + + // Exit all tests + rocrtst::TestGroupExit(tg_concurrent); + + // Destroy thread group and cleanup resources + rocrtst::TestGroupDestroy(tg_concurrent); + } + return; +} + + +// This test verify if each Agent pool's attribute information +// is consistent across multiple thread. +void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(hsa_agent_t agent, + hsa_amd_memory_pool_t pool) { + hsa_status_t err; + + rocrtst::pool_info_t pool_i; + err = rocrtst::AcquirePoolInfo(pool, &pool_i); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + if (verbosity() > 0) { + PrintAgentNameAndType(agent); + } + + + uint32_t kk; + thread_data_get_pool_info_t thread_data[kNumThreads]; + + // Create a test group + rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads); + + for (kk = 0; kk < kNumThreads; kk++) { + thread_data[kk].pool = pool; + thread_data[kk].info = &pool_i; + thread_data[kk].consistency = 0; + rocrtst::TestGroupAdd(tg_concurrent, &CallbackGetPoolInfo, thread_data + kk, 1); + } + + // Create threads for each test + rocrtst::TestGroupThreadCreate(tg_concurrent); + + // Start to run tests + rocrtst::TestGroupStart(tg_concurrent); + + // Wait all tests finish + rocrtst::TestGroupWait(tg_concurrent); + + // Exit all tests + rocrtst::TestGroupExit(tg_concurrent); + + // Destroy thread group and cleanup resources + rocrtst::TestGroupDestroy(tg_concurrent); + + // Verify pool info is consistent among all threads + for (kk = 0; kk < kNumThreads; kk++) { + ASSERT_EQ(thread_data[kk].consistency, 1); + } + return; +} + + + +void MemoryConcurrentTest::MemoryConcurrentAllocate(void) { + hsa_status_t err; + std::vector> agent_pools; + + if (verbosity() > 0) { + PrintMemorySubtestHeader("MemoryConcurrentAllocate in Stress Test"); + } + err = rocrtst::GetAgentPools(&agent_pools); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + auto pool_idx = 0; + for (auto a : agent_pools) { + for (auto p : a->pools) { + if (verbosity() > 0) { + std::cout << " Pool " << pool_idx++ << ":" << std::endl; + } + MemoryConcurrentAllocate(a->agent, p); + } + } + + if (verbosity() > 0) { + std::cout << "subtest Passed" << std::endl; + std::cout << kSubTestSeparator << std::endl; + } +} + +void MemoryConcurrentTest::MemoryConcurrentFree(void) { + hsa_status_t err; + std::vector> agent_pools; + + if (verbosity() > 0) { + PrintMemorySubtestHeader("MemoryConcurrentFree in Stress Test"); + } + + err = rocrtst::GetAgentPools(&agent_pools); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + auto pool_idx = 0; + for (auto a : agent_pools) { + for (auto p : a->pools) { + if (verbosity() > 0) { + std::cout << " Pool " << pool_idx++ << ":" << std::endl; + } + MemoryConcurrentFree(a->agent, p); + } + } + + if (verbosity() > 0) { + std::cout << "subtest Passed" << std::endl; + std::cout << kSubTestSeparator << std::endl; + } +} + +void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(void) { + hsa_status_t err; + std::vector> agent_pools; + + if (verbosity() > 0) { + PrintMemorySubtestHeader("MemoryConcurrentPoolGetInfo in Stress Test"); + } + err = rocrtst::GetAgentPools(&agent_pools); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + auto pool_idx = 0; + for (auto a : agent_pools) { + for (auto p : a->pools) { + if (verbosity() > 0) { + std::cout << " Pool " << pool_idx++ << ":" << std::endl; + } + MemoryConcurrentPoolGetInfo(a->agent, p); + } + } + + if (verbosity() > 0) { + std::cout << "subtest Passed" << std::endl; + std::cout << kSubTestSeparator << std::endl; + } +} + +#undef RET_IF_HSA_ERR diff --git a/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.h b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.h new file mode 100755 index 0000000000..d9d2dd96fd --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.h @@ -0,0 +1,109 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_ +#define ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_ + + +#include "common/base_rocr.h" +#include "hsa/hsa.h" +#include "suites/test_common/test_base.h" + + +class MemoryConcurrentTest : public TestBase { + public: + MemoryConcurrentTest(bool launch_Concurrent_Allocate_, + bool launch_Concurrent_Free_ , + bool launch_Concurrent_PoolGetInfo_); + + // @Brief: Destructor for test case of MemoryTest + virtual ~MemoryConcurrentTest(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + + // @Brief: This test verify check memory can be + // concurrently allocated from pool on ROCR agents + void MemoryConcurrentAllocate(void); + + // @Brief: This test verify check memory can be + // concurrently freed from pool on ROCR agents + void MemoryConcurrentFree(void); + + // @Brief: This test verify if each Agent pool's attribute information + // is consistent across multiple thread. + void MemoryConcurrentPoolGetInfo(void); + + private: + void MemoryConcurrentAllocate(hsa_agent_t agent, + hsa_amd_memory_pool_t pool); + void MemoryConcurrentFree(hsa_agent_t agent, + hsa_amd_memory_pool_t pool); + void MemoryConcurrentPoolGetInfo(hsa_agent_t agent, + hsa_amd_memory_pool_t pool); + + // @Brief: Indicate if launch concurrent allocate test + bool launch_Concurrent_Allocate_; + + // @Brief: Indicate if launch concurrent Free test + bool launch_Concurrent_Free_; + + // @Brief: Indicate if launch concurrent pool get info test + bool launch_Concurrent_PoolGetInfo_; +}; + +#endif // ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_ diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt b/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt index 6f527344f9..fcff6a2c00 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt +++ b/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt @@ -241,6 +241,7 @@ set(ROCRTST "rocrtst${ONLY64STR}") aux_source_directory(${ROCRTST_ROOT}/suites/performance performanceSources) aux_source_directory(${ROCRTST_ROOT}/suites/functional functionalSources) aux_source_directory(${ROCRTST_ROOT}/suites/negative negativeSources) +aux_source_directory(${ROCRTST_ROOT}/suites/stress stressSources) aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources) # Header file include path @@ -316,7 +317,7 @@ set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl") build_sample_for_devices("gpuReadWrite") # Build rules -add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} +add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources} ${common_srcs} ${testCommonSources}) target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt numa hwloc) diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc index 8ee79e585f..619799866a 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc +++ b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc @@ -56,6 +56,7 @@ #include "suites/performance/memory_async_copy_numa.h" #include "suites/performance/enqueueLatency.h" #include "suites/negative/memory_allocate_negative_tests.h" +#include "suites/stress/memory_concurrent_tests.h" #include "suites/test_common/test_case_template.h" #include "suites/test_common/main.h" #include "suites/test_common/test_common.h" @@ -158,6 +159,26 @@ TEST(rocrtstNeg, Memory_Negative_Tests) { RunCustomTestEpilog(&mt); } +TEST(rocrtstStress, Memory_Concurrent_Allocate_Test) { + MemoryConcurrentTest mt(true, false, false); + RunCustomTestProlog(&mt); + mt.MemoryConcurrentAllocate(); + RunCustomTestEpilog(&mt); +} + +TEST(rocrtstStress, Memory_Concurrent_Free_Test) { + MemoryConcurrentTest mt(false, true, false); + RunCustomTestProlog(&mt); + mt.MemoryConcurrentFree(); + RunCustomTestEpilog(&mt); +} + +TEST(rocrtstStress, Memory_Concurrent_Pool_Info_Test) { + MemoryConcurrentTest mt(false, false, true); + RunCustomTestProlog(&mt); + mt.MemoryConcurrentPoolGetInfo(); + RunCustomTestEpilog(&mt); +} TEST(rocrtstPerf, ENQUEUE_LATENCY) { EnqueueLatency singlePacketequeue(true); EnqueueLatency multiPacketequeue(false);