Memory Concurrent tests for pool Memory allocate, Memory free and get pool info

Change-Id: I6a1343348e400fe466e041d651adaa67be561a21 [ROCm/ROCR-Runtime commit: 47af1d673e]
2018-05-02 15:04:25 +05:30
commit 32ffaeb7d1
@@ -0,0 +1,317 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2018, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <cstdlib>
+#include "common/concurrent_utils.h"
+
+namespace rocrtst {
+
+/**
+ * @brief worker function is invoked by each thread to execute tests
+ * Initially, all threads are blocked to wait run_flag. After run_flag being
+ * set up, the worker function begin to execute test function and change
+ * the status of tests to TEST_RUNNING. After test function finish, the status
+ * of tests will be changed to TEST_FINISHED, and worker function will be
+ * blocked until run_flag being set up again.
+ * @param input Pointer to thread_aux data structure, which contains test
+ * function pointer and corresponding args for the test function, and other
+ * auxiliary information, including status of test, number of running tests,
+ * run_flag, exit_flag, etc.
+ */
+
+static void *worker(void *input) {
+  func_ptr fun_prt;
+  thread_aux* thread = reinterpret_cast<thread_aux*>(input);
+  fun_prt = reinterpret_cast<func_ptr>(thread->test->fun_prt);
+  int run_flag_l = 0;
+
+  // While loop to repeatedly execute test function
+  while (1) {
+    pthread_mutex_lock(thread->test_mutex);
+    // Blocked to wait run_flag or exit_flag being changed
+    while (*thread->run_flag == run_flag_l && *thread->exit_flag == 0) {
+      pthread_cond_wait(thread->test_cond, thread->test_mutex);
+    }
+    pthread_mutex_unlock(thread->test_mutex);
+
+    // Reset run_flag
+    run_flag_l = run_flag_l ^ 1;
+
+    // If exit_flag is 0, run test function and set status of the test to
+    // TEST_RUNNING
+    if (*thread->exit_flag == 0) {
+      thread->test->status = TEST_RUNNING;
+      fun_prt(thread->test->data);
+
+      // After test function finish, subtract the number of running tests via atomic operations
+      // and check the number of running tests, if the number equal to 1,
+      // it means all tests are finished, broadcast a signal to the wakeup master
+      // thread.
+      pthread_mutex_lock(thread->test_mutex);
+      (*(thread->num_running_t))--;
+
+      if ((*thread->num_running_t) == 0) {
+        pthread_cond_broadcast(thread->test_cond);
+      }
+      pthread_mutex_unlock(thread->test_mutex);
+
+      // Set status of the test to TEST_STOP
+      thread->test->status = TEST_STOP;
+    } else {
+      // If exit_flag is no-zero, set status of the test to TEST_FINISHED
+      thread->test->status = TEST_FINISHED;
+      pthread_exit(NULL);
+    }
+  }
+  return NULL;
+}
+
+/**
+ * @brief create a test_group data structure, initialize variables in
+ * the test_group structure, allocate a test_list of group_size and
+ * return a pointer to the test_group.
+ * @param group_size The size of test group, i.e., the size of test lists
+ * @return Pointer to the new test_group
+ */
+test_group *TestGroupCreate(size_t group_size) {
+  test_group *new_group = static_cast<test_group *>(malloc(sizeof(test_group)));
+  // initialize variables in the data structure
+  new_group->group_size = group_size;
+  new_group->n_threads = 0;
+  new_group->num_test = 0;
+  new_group->run_flag = 0;
+  new_group->exit_flag = 0;
+  new_group->num_running_t = 0;
+  // malloc test_list array with group_size
+  new_group->test_list = static_cast<test_aux *>(malloc(sizeof(test_aux) * group_size));
+
+  return new_group;
+}
+
+void TestGroupWait(test_group *t_group) {
+  pthread_mutex_lock(&t_group->test_mutex);
+  while (t_group->num_running_t != 0) {
+    pthread_cond_wait(&t_group->test_cond, &t_group->test_mutex);
+  }
+  pthread_mutex_unlock(&t_group->test_mutex);
+
+  return;
+}
+
+void TestGroupAdd(test_group *t_group, func_ptr fun_prt, void *data, size_t num_copy) {
+  if (t_group->group_size < (num_copy + t_group->num_test)) {
+    fprintf(stderr, "Error beyound group size: %lu, please resize the test_group\n", t_group->group_size);
+    return;
+  }
+
+  int num_test = t_group->num_test;
+  test_aux *test_list = t_group->test_list;
+  unsigned int ii;
+  for (ii = 0; ii < num_copy; ii++) {
+    test_list[num_test + ii].fun_prt = reinterpret_cast<void*>(fun_prt);
+    test_list[num_test + ii].data = data;
+    test_list[num_test + ii].status = TEST_NOT_STARTED;
+  }
+  t_group->num_test = num_test + num_copy;
+
+  return;
+}
+
+void TestGroupResize(test_group *t_group, size_t new_group_size) {
+  if (new_group_size < t_group->group_size) {
+    fprintf(stderr, "Error new group_size is smaller than current group_size\n");
+  }
+
+  test_aux *new_test_list;
+  new_test_list = static_cast<test_aux *>(realloc(t_group->test_list, new_group_size * sizeof(test_aux)));
+  t_group->group_size = new_group_size;
+  t_group->test_list = new_test_list;
+
+  return;
+}
+
+// Create threads for tests
+void TestGroupThreadCreate(test_group *t_group) {
+  pthread_mutex_init(&(t_group->test_mutex), NULL);
+  pthread_cond_init(&(t_group->test_cond), NULL);
+  pthread_attr_init(&(t_group->attr));
+  pthread_attr_setdetachstate(&(t_group->attr), PTHREAD_CREATE_JOINABLE);
+
+  int n_threads;
+  int ii = 0;
+
+  n_threads = t_group->n_threads = t_group->num_test;
+  thread_aux *thread_list = t_group->thread_list =
+              static_cast<thread_aux *>(malloc(sizeof(thread_aux) * n_threads));
+  t_group->tid = static_cast<pthread_t*>(malloc(sizeof(pthread_t) * n_threads));
+
+  for (ii = 0; ii < n_threads; ++ii) {
+    // CPU_ZERO(&thread_list[ii].cpuset);
+    thread_list[ii].tid = ii;
+    thread_list[ii].test = t_group->test_list + ii;
+    thread_list[ii].run_flag = &(t_group->run_flag);
+    thread_list[ii].exit_flag = &(t_group->exit_flag);
+    thread_list[ii].test_mutex = &(t_group->test_mutex);
+    thread_list[ii].test_cond = &(t_group->test_cond);
+    thread_list[ii].num_running_t = &(t_group->num_running_t);
+    int status = pthread_create(t_group->tid + ii, &(t_group->attr), worker, thread_list + ii);
+    if (status < 0) {
+      perror("pthread_create failed");
+    }
+  }
+
+  return;
+}
+
+// Return number of test
+int TestGroupNumTests(test_group *t_group) {
+  return t_group->num_test;
+}
+
+// Set affinity of the specific test
+void TestGroupThreadAffinity(test_group *t_group, int test_id, int cpu_id) {
+/*  Setting CPU affinity isn't currently supported.
+ *  CPU_SET(cpu_id, &t_group->thread_list[test_id].cpuset);
+ *  int status;
+ *  status = pthread_setaffinity_np(t_group->tid[test_id],
+ *          sizeof(cpu_set_t), &t_group->thread_list[test_id].cpuset);
+ *  if (status != 0) {
+ *      perror("pthread_setaffinity_np error");
+ *  }
+ */
+  return;
+}
+
+// Set run_flag to 1
+void TestGroupStart(test_group *t_group) {
+  if (t_group->num_running_t != 0) {
+    fprintf(stderr, "Error: %d tests are not finished\n", t_group->num_running_t);
+    return;
+  }
+
+  pthread_mutex_lock(&t_group->test_mutex);
+  t_group->run_flag = t_group->run_flag ^ 1;
+  t_group->num_running_t = t_group->num_test;
+  pthread_cond_broadcast(&t_group->test_cond);
+  pthread_mutex_unlock(&t_group->test_mutex);
+
+  return;
+}
+
+// Set exit_flag to 1, wait all threads finish and cleanup
+void TestGroupExit(test_group *t_group) {
+  int ii = 0;
+  int status;
+
+  pthread_mutex_lock(&t_group->test_mutex);
+  t_group->exit_flag = 1;
+  pthread_cond_broadcast(&t_group->test_cond);
+  pthread_mutex_unlock(&t_group->test_mutex);
+
+  for (ii = 0; ii < t_group->n_threads; ++ii) {
+    status = pthread_join(t_group->tid[ii], 0);
+    if (status < 0) {
+      perror("pthread_join failed");
+      t_group->test_list[ii].status = TEST_ERROR;
+    }
+  }
+
+  pthread_attr_destroy(&(t_group->attr));
+  pthread_mutex_destroy(&(t_group->test_mutex));
+  pthread_cond_destroy(&(t_group->test_cond));
+
+  free(t_group->tid);
+  free(t_group->thread_list);
+
+  return;
+}
+
+void TestGroupKill(test_group *t_group) {
+  int ii = 0;
+  int status;
+  for (ii = 0; ii < t_group->n_threads; ++ii) {
+    status = pthread_cancel(t_group->tid[ii]);
+    if (status < 0) {
+      perror("pthread_cancel failed");
+      t_group->test_list[ii].status = TEST_ERROR;
+    }
+  }
+
+  pthread_attr_destroy(&(t_group->attr));
+  pthread_mutex_destroy(&(t_group->test_mutex));
+  pthread_cond_destroy(&(t_group->test_cond));
+
+  free(t_group->tid);
+  free(t_group->thread_list);
+
+  return;
+}
+
+void TestGroupDestroy(test_group *t_group) {
+  free(t_group->test_list);
+  free(t_group);
+
+  return;
+}
+
+int TestGroupTestStatus(test_group *t_group, int test_id) {
+  if (test_id >= t_group->n_threads) {
+    fprintf(stderr, "test_id: %d is larger than the number of test: %d\n", test_id, t_group->num_test);
+  }
+
+  if (t_group->test_list[test_id].status == TEST_RUNNING) {
+    if (pthread_kill(t_group->tid[test_id], 0) == ESRCH) {
+      t_group->test_list[test_id].status = TEST_ERROR;
+    }
+  }
+
+  return t_group->test_list[test_id].status;
+}
+
+}  // namespace rocrtst
@@ -0,0 +1,213 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2018, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef ROCRTST_COMMON_CONCURRENT_UTILS_H_
+#define ROCRTST_COMMON_CONCURRENT_UTILS_H_
+
+#include <pthread.h>
+#include <stdint.h>
+#include <iostream>
+
+
+namespace rocrtst {
+/**
+ * @enum TEST_STATUS
+ * @brief This enum lists status of test pthread
+ */
+enum TEST_STATUS {TEST_NOT_STARTED, TEST_RUNNING,
+                  TEST_STOP, TEST_FINISHED, TEST_ERROR};
+
+
+typedef void (*func_ptr)(void *input);
+/**
+ * @struct test_aux
+ * @brief This structure holds information for a test
+ */
+struct test_aux{
+    // Pointer to the test function
+    void *fun_prt;
+    // Pointer to the data for the test function
+    void *data;
+    // status of the test listed in enum TEST_STATUS
+    uint16_t status;
+};
+
+/**
+ * @struct thread_aux
+ * @brief This structure holds the data for a test thread.
+ */
+struct thread_aux {
+    // Thread Id
+    int tid;
+    // Pointer to a test item
+    test_aux *test;
+    // Pointer to the run_flag shared in the test group
+    volatile int *run_flag;
+    // Pointer to the exit_flag shared in the test group
+    volatile int *exit_flag;
+    // Pointer to the pthread mutex shared in the test group
+    pthread_mutex_t *test_mutex;
+    // Pointer to the pthread condition shared in the test group
+    pthread_cond_t *test_cond;
+    // Pointer to the number of running tests
+    volatile unsigned int *num_running_t;
+};
+
+/**
+ * @struct test_group
+ * @brief This structure holds data for a test group
+ */
+struct test_group {
+    // test group size, i.e., size of test_list array
+    size_t group_size;
+    // number of test
+    int num_test;
+    // number of threads - since one test per thread, equal to num_test
+    int n_threads;
+    // a flag for telling all threads to run - 0: stop, 1: run
+    volatile int run_flag;
+    // a flag for telling all threads to finish - 1: exit
+    volatile int exit_flag;
+    // pthread tid
+    pthread_t *tid;
+    // pthread attr
+    pthread_attr_t attr;
+    // pthread mutex shared in a group
+    pthread_mutex_t test_mutex;
+    // pthread condition signal shared in a group
+    pthread_cond_t test_cond;
+    // the list of test info
+    test_aux *test_list;
+    // the list of thread info
+    thread_aux *thread_list;
+    // number of running tests
+    volatile unsigned int num_running_t;
+};
+
+/**
+ * @brief create a test group, and preallocate
+ * test_list array with group_size
+ * @return initialized struct test_group
+ */
+test_group* TestGroupCreate(size_t group_size);
+
+/**
+ * @brief resize the array of test_list
+ * @return
+ */
+void TestGroupResize(test_group *t_group, size_t new_group_size);
+
+/**
+ * @brief add a new test into the specific test group
+ * @param t_group Pointer to a test group
+ * @param fun Pointer to the test function
+ * @param data Pointer to data for the test function
+ * @param num_copy Number of copies of the test
+ */
+void TestGroupAdd(test_group *t_group, func_ptr fun,
+                    void *data, size_t num_copy);
+
+/**
+ * @brief create threads for tests in a test group
+ * @param t_group Pointer to a test group
+ */
+void TestGroupThreadCreate(test_group *t_group);
+
+/**
+ * @brief return the number of tests in a test group
+ * @param t_group Pointer to a test group
+ */
+int TestGroupNumTests(test_group *t_group);
+
+/**
+ * @brief run all threads/tests in a test group
+ * @param t_group Pointer to a test group
+ */
+void TestGroupStart(test_group *t_group);
+
+/**
+ * @brief wait all threads/tests in a test group finish
+ * The function is blocked until all threads are finished
+ * @param t_group Pointer to a test group
+ */
+void TestGroupWait(test_group *t_group);
+
+/**
+ * @brief terminate all threads/tests in a test group by sending a signal
+ * set exit_flag to 1, wait until all threads are finished
+ * @param t_group Pointer to a test group
+ */
+void TestGroupExit(test_group *t_group);
+
+/**
+ * @brief destroy a test group, release all resources
+ * @param t_group Pointer to a test group
+ */
+void TestGroupDestroy(test_group *t_group);
+
+/**
+ * @brief check the status of specific test in a test group
+ * @param t_group Pointer to a test group
+ * @param test_id Test No.
+ * @return the status of the test listed in enum TEST_STATUS
+ */
+int TestGroupTestStatus(test_group *t_group, int test_id);
+
+/**
+ * @brief set affinity of the specific test
+ * @param t_group Pointer to a test group
+ * @param test_id Test No.
+ * @param cpu_id CPU No. that the test is binded to
+ */
+void TestGroupThreadAffinity(test_group *t_group,
+                                int test_id, int cpu_id);
+
+/**
+ * @brief force kill a test group
+ * @param t_group Pointer to a test group
+ */
+void TestGroupKill(test_group *t_group);
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_CONCURRENT_UTILS_H_
@@ -0,0 +1,537 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2018, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+
+#include <fcntl.h>
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "suites/stress/memory_concurrent_tests.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "common/concurrent_utils.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_finalize.h"
+
+
+static const uint32_t kNumThreads = 1024;
+static const uint32_t kMaxAllocSize = 1024 * 1024;
+
+
+
+#define RET_IF_HSA_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    const char* msg = 0; \
+    hsa_status_string(err, &msg); \
+    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
+                          __FILE__ << ". Call returned " << err << std::endl; \
+    std::cout << msg << std::endl; \
+    return (err); \
+  } \
+}
+
+
+typedef struct control_block {
+    hsa_amd_memory_pool_t* pool;
+    size_t alloc_size;
+    void* alloc_pointer;
+} cb_t;
+
+
+// Callback function which will call upon when need
+// to allocate memory from the pool in the thread.
+static void CallbackHSAMemoryAllocateFunc(void *data) {
+  hsa_status_t err;
+  cb_t *cb = static_cast<cb_t*>(data);
+
+  err = hsa_amd_memory_pool_allocate(*(cb->pool),
+                               cb->alloc_size, 0,
+                               reinterpret_cast<void**>(&(cb->alloc_pointer)));
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  return;
+}
+
+// Callback function which will call upon when need
+// to Free memory from the pool in the thread.
+static void CallbackHSAMemoryFreeFunc(void *data) {
+  hsa_status_t err;
+  cb_t *cb = static_cast<cb_t*>(data);
+
+  err = hsa_memory_free(cb->alloc_pointer);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  return;
+}
+
+typedef struct thread_data_get_pool_info_s {
+    // The current pool
+    hsa_amd_memory_pool_t pool;
+    // The pool info retrieved from main thread
+    rocrtst::pool_info_t* info;
+    // Consistency check result
+    int consistency;
+} thread_data_get_pool_info_t;
+
+// Callback function which will call upon when need
+// to Fetch different info for the pool in the thread.
+static void CallbackGetPoolInfo(void* data) {
+  hsa_status_t err;
+
+  thread_data_get_pool_info_t* thread_data =
+              static_cast<thread_data_get_pool_info_t*>(data);
+
+  rocrtst::pool_info_t info;
+  memset(&info, 0, sizeof(rocrtst::pool_info_t));
+  err = rocrtst::AcquirePoolInfo(thread_data->pool, &info);
+  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
+
+  if (0 == memcmp(thread_data->info, &info, sizeof(rocrtst::pool_info_t))) {
+    // The pool info is consistent with the one got from the main thread
+    thread_data->consistency = 1;
+  } else {
+    thread_data->consistency = 0;
+  }
+}
+
+MemoryConcurrentTest::MemoryConcurrentTest(bool launch_Concurrent_Allocate_,
+                      bool launch_Concurrent_Free_ ,
+                      bool launch_Concurrent_PoolGetInfo_) :TestBase() {
+  set_num_iteration(10);  // Number of iterations to execute of the main test;
+                          // This is a default value which can be overridden
+                          // on the command line.
+
+  std::string name;
+  std::string desc;
+
+  name = "RocR Memory Concurrent";
+  desc = "These series of tests are Stress tests which contains different subtests ";
+
+  if (launch_Concurrent_Allocate_) {
+    name += " Allocate";
+    desc += " This test Verify that memory can be concurrently allocated from pool"
+            " and thread safety while allocating memory from different threads"
+            " on ROCR agents";
+  } else if (launch_Concurrent_Free_) {
+    name += " Free";
+    desc += " This test thet memory Verify can be concurrently freed from pool"
+            " and thread safety while memory free from different threads"
+            " on ROCR agents";
+  } else if (launch_Concurrent_PoolGetInfo_) {
+    name += " PoolGetInfo";
+    desc += " This test Verify that memory pool info can be concurrently "
+            " get from different threads on ROCR agents";
+  }
+  set_title(name);
+  set_description(desc);
+}
+
+MemoryConcurrentTest::~MemoryConcurrentTest(void) {
+}
+
+// Any 1-time setup involving member variables used in the rest of the test
+// should be done here.
+void MemoryConcurrentTest::SetUp(void) {
+  hsa_status_t err;
+
+  TestBase::SetUp();
+
+  err = rocrtst::SetDefaultAgents(this);
+  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
+
+  err = rocrtst::SetPoolsTypical(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  return;
+}
+
+void MemoryConcurrentTest::Run(void) {
+  // Compare required profile for this test case with what we're actually
+  // running on
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  TestBase::Run();
+}
+
+void MemoryConcurrentTest::DisplayTestInfo(void) {
+  TestBase::DisplayTestInfo();
+}
+
+void MemoryConcurrentTest::DisplayResults(void) const {
+  // Compare required profile for this test case with what we're actually
+  // running on
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  return;
+}
+
+void MemoryConcurrentTest::Close() {
+  // This will close handles opened within rocrtst utility calls and call
+  // hsa_shut_down(), so it should be done after other hsa cleanup
+  TestBase::Close();
+}
+
+
+
+
+static const char kSubTestSeparator[] = "  **************************";
+
+static void PrintMemorySubtestHeader(const char *header) {
+  std::cout << "  *** Memory Stress Subtest: " << header << " ***" << std::endl;
+}
+
+static void PrintAgentNameAndType(hsa_agent_t agent) {
+  hsa_status_t err;
+
+  char ag_name[64];
+  hsa_device_type_t ag_type;
+
+  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  std::cout << "  Agent: " << ag_name << " (";
+  switch (ag_type) {
+    case HSA_DEVICE_TYPE_CPU:
+      std::cout << "CPU)";
+      break;
+    case HSA_DEVICE_TYPE_GPU:
+      std::cout << "GPU)";
+      break;
+    case HSA_DEVICE_TYPE_DSP:
+      std::cout << "DSP)";
+      break;
+    }
+  std::cout << std::endl;
+  return;
+}
+
+// This test verify check  memory can be
+// concurrently allocated from pool on ROCR agents
+void MemoryConcurrentTest::MemoryConcurrentAllocate(hsa_agent_t agent,
+                                               hsa_amd_memory_pool_t pool) {
+  hsa_status_t err;
+
+  rocrtst::pool_info_t pool_i;
+  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
+  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
+
+  if (verbosity() > 0) {
+    PrintAgentNameAndType(agent);
+  }
+
+  // Determine if allocation is allowed in this memory pool
+  bool alloc = false;
+  err = hsa_amd_memory_pool_get_info(pool,
+                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
+
+  if (alloc) {
+    size_t alloc_size;
+    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
+                                      &alloc_size);
+    // Adjust the size to the minimum of 1024 or max alloc size
+    alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize;
+    // Create a test group
+    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);
+
+    // The control blocks are used to pass data to the threads
+    uint32_t kk;
+    cb_t cb[kNumThreads];
+    for (kk = 0; kk < kNumThreads; kk++) {
+      cb[kk].pool = &pool;
+      cb[kk].alloc_size = alloc_size;
+      rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryAllocateFunc, &cb[kk], 1);
+    }
+
+    // Create threads for each test
+    rocrtst::TestGroupThreadCreate(tg_concurrent);
+
+    // Start to run tests
+    rocrtst::TestGroupStart(tg_concurrent);
+
+    // Wait all tests finish
+    rocrtst::TestGroupWait(tg_concurrent);
+
+    // Exit all tests
+    rocrtst::TestGroupExit(tg_concurrent);
+
+    // Destroy thread group and cleanup resources
+    rocrtst::TestGroupDestroy(tg_concurrent);
+
+    // Check for overlapping addresses
+    char *addr1, *addr2;
+    for (kk = 0; kk < kNumThreads; ++kk) {
+      addr1 = reinterpret_cast<char *>(cb[kk].alloc_pointer);
+      addr2 = addr1+alloc_size;
+      ASSERT_NE(reinterpret_cast<void *>(addr1), nullptr);
+      uint32_t ll;
+      for (ll = kk+1; ll < kNumThreads; ++ll) {
+        if (addr1 < reinterpret_cast<char *>(cb[ll].alloc_pointer)) {
+          ASSERT_LE(addr2, reinterpret_cast<char *>(cb[ll].alloc_pointer));
+        }
+        if (addr2 > reinterpret_cast<char *>(cb[ll].alloc_pointer)+alloc_size) {
+          ASSERT_GE(addr1, reinterpret_cast<char *>(cb[ll].alloc_pointer)+alloc_size);
+        }
+      }
+    }
+
+    for (uint32_t ii = 0; ii < kNumThreads; ii++) {
+      err = hsa_memory_free(cb[ii].alloc_pointer);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+    }
+  }
+  return;
+}
+
+
+
+
+// This test verify check  memory can be
+// concurrently allocated from pool on ROCR agents
+void MemoryConcurrentTest::MemoryConcurrentFree(hsa_agent_t agent,
+                                                hsa_amd_memory_pool_t pool) {
+  hsa_status_t err;
+
+  rocrtst::pool_info_t pool_i;
+  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
+  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
+
+  if (verbosity() > 0) {
+    PrintAgentNameAndType(agent);
+  }
+
+  // Determine if allocation is allowed in this pool
+  bool alloc = false;
+  err = hsa_amd_memory_pool_get_info(pool,
+                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  if (alloc) {
+    // Get the maximum allocation size
+    size_t alloc_size;
+    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
+                                      &alloc_size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Adjust the size to the minimum of 1024 or max alloc size
+    alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize;
+
+    // Create a test group
+    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);
+
+    // The control blocks are used to pass data to the threads
+    uint32_t kk;
+    cb_t cb[kNumThreads];
+    for (kk = 0; kk < kNumThreads; kk++) {
+      cb[kk].pool = &pool;
+      cb[kk].alloc_size = alloc_size;
+      err = hsa_amd_memory_pool_allocate(*(cb[kk].pool), cb[kk].alloc_size, 0, &(cb[kk].alloc_pointer));
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryFreeFunc, &cb[kk], 1);
+    }
+
+    // Create threads for each test
+    rocrtst::TestGroupThreadCreate(tg_concurrent);
+
+    // Start to run tests
+    rocrtst::TestGroupStart(tg_concurrent);
+
+    // Wait all tests finish
+    rocrtst::TestGroupWait(tg_concurrent);
+
+    // Exit all tests
+    rocrtst::TestGroupExit(tg_concurrent);
+
+    // Destroy thread group and cleanup resources
+    rocrtst::TestGroupDestroy(tg_concurrent);
+  }
+  return;
+}
+
+
+// This test verify if each Agent pool's attribute information
+// is consistent across multiple thread.
+void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(hsa_agent_t agent,
+                                                hsa_amd_memory_pool_t pool) {
+  hsa_status_t err;
+
+  rocrtst::pool_info_t pool_i;
+  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
+  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
+
+  if (verbosity() > 0) {
+    PrintAgentNameAndType(agent);
+  }
+
+
+  uint32_t kk;
+  thread_data_get_pool_info_t thread_data[kNumThreads];
+
+  // Create a test group
+  rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);
+
+  for (kk = 0; kk < kNumThreads; kk++) {
+    thread_data[kk].pool = pool;
+    thread_data[kk].info = &pool_i;
+    thread_data[kk].consistency = 0;
+    rocrtst::TestGroupAdd(tg_concurrent, &CallbackGetPoolInfo, thread_data + kk, 1);
+  }
+
+  // Create threads for each test
+  rocrtst::TestGroupThreadCreate(tg_concurrent);
+
+  // Start to run tests
+  rocrtst::TestGroupStart(tg_concurrent);
+
+  // Wait all tests finish
+  rocrtst::TestGroupWait(tg_concurrent);
+
+  // Exit all tests
+  rocrtst::TestGroupExit(tg_concurrent);
+
+  // Destroy thread group and cleanup resources
+  rocrtst::TestGroupDestroy(tg_concurrent);
+
+  // Verify pool info is consistent among all threads
+  for (kk = 0; kk < kNumThreads; kk++) {
+    ASSERT_EQ(thread_data[kk].consistency, 1);
+  }
+  return;
+}
+
+
+
+void MemoryConcurrentTest::MemoryConcurrentAllocate(void) {
+  hsa_status_t err;
+  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
+
+  if (verbosity() > 0) {
+    PrintMemorySubtestHeader("MemoryConcurrentAllocate in Stress Test");
+  }
+  err = rocrtst::GetAgentPools(&agent_pools);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  auto pool_idx = 0;
+  for (auto a : agent_pools) {
+    for (auto p : a->pools) {
+      if (verbosity() > 0) {
+        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
+      }
+      MemoryConcurrentAllocate(a->agent, p);
+    }
+  }
+
+  if (verbosity() > 0) {
+    std::cout << "subtest Passed" << std::endl;
+    std::cout << kSubTestSeparator << std::endl;
+  }
+}
+
+void MemoryConcurrentTest::MemoryConcurrentFree(void) {
+  hsa_status_t err;
+  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
+
+  if (verbosity() > 0) {
+    PrintMemorySubtestHeader("MemoryConcurrentFree in Stress Test");
+  }
+
+  err = rocrtst::GetAgentPools(&agent_pools);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  auto pool_idx = 0;
+  for (auto a : agent_pools) {
+    for (auto p : a->pools) {
+      if (verbosity() > 0) {
+        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
+      }
+      MemoryConcurrentFree(a->agent, p);
+    }
+  }
+
+  if (verbosity() > 0) {
+    std::cout << "subtest Passed" << std::endl;
+    std::cout << kSubTestSeparator << std::endl;
+  }
+}
+
+void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(void) {
+  hsa_status_t err;
+  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
+
+  if (verbosity() > 0) {
+    PrintMemorySubtestHeader("MemoryConcurrentPoolGetInfo in Stress Test");
+  }
+  err = rocrtst::GetAgentPools(&agent_pools);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  auto pool_idx = 0;
+  for (auto a : agent_pools) {
+    for (auto p : a->pools) {
+      if (verbosity() > 0) {
+        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
+      }
+      MemoryConcurrentPoolGetInfo(a->agent, p);
+    }
+  }
+
+  if (verbosity() > 0) {
+    std::cout << "subtest Passed" << std::endl;
+    std::cout << kSubTestSeparator << std::endl;
+  }
+}
+
+#undef RET_IF_HSA_ERR
@@ -0,0 +1,109 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2018, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+#ifndef ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_
+#define ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_
+
+
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include "suites/test_common/test_base.h"
+
+
+class MemoryConcurrentTest : public TestBase {
+ public:
+    MemoryConcurrentTest(bool launch_Concurrent_Allocate_,
+                         bool launch_Concurrent_Free_ ,
+                         bool launch_Concurrent_PoolGetInfo_);
+
+  // @Brief: Destructor for test case of MemoryTest
+  virtual ~MemoryConcurrentTest();
+
+  // @Brief: Setup the environment for measurement
+  virtual void SetUp();
+
+  // @Brief: Core measurement execution
+  virtual void Run();
+
+  // @Brief: Clean up and retrive the resource
+  virtual void Close();
+
+  // @Brief: Display  results
+  virtual void DisplayResults() const;
+
+  // @Brief: Display information about what this test does
+  virtual void DisplayTestInfo(void);
+
+
+  // @Brief: This test verify check  memory can be
+  // concurrently allocated from pool on ROCR agents
+  void MemoryConcurrentAllocate(void);
+
+  // @Brief: This test verify check  memory can be
+  // concurrently freed from pool on ROCR agents
+  void MemoryConcurrentFree(void);
+
+  // @Brief: This test verify if each Agent pool's attribute information
+  // is consistent across multiple thread.
+  void MemoryConcurrentPoolGetInfo(void);
+
+ private:
+  void MemoryConcurrentAllocate(hsa_agent_t agent,
+                             hsa_amd_memory_pool_t pool);
+  void MemoryConcurrentFree(hsa_agent_t agent,
+                             hsa_amd_memory_pool_t pool);
+  void MemoryConcurrentPoolGetInfo(hsa_agent_t agent,
+                             hsa_amd_memory_pool_t pool);
+
+  // @Brief: Indicate if launch concurrent allocate test
+  bool launch_Concurrent_Allocate_;
+
+  // @Brief: Indicate if launch concurrent Free test
+  bool launch_Concurrent_Free_;
+
+  // @Brief: Indicate if launch concurrent pool get info test
+  bool launch_Concurrent_PoolGetInfo_;
+};
+
+#endif  // ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_
@@ -241,6 +241,7 @@ set(ROCRTST "rocrtst${ONLY64STR}")
 aux_source_directory(${ROCRTST_ROOT}/suites/performance performanceSources)
 aux_source_directory(${ROCRTST_ROOT}/suites/functional functionalSources)
 aux_source_directory(${ROCRTST_ROOT}/suites/negative negativeSources)
+aux_source_directory(${ROCRTST_ROOT}/suites/stress stressSources)
 aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources)

 # Header file include path
@@ -316,7 +317,7 @@ set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl")
 build_sample_for_devices("gpuReadWrite")

 # Build rules
-add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources}
+add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources}
                                           ${common_srcs} ${testCommonSources})

 target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt numa hwloc)
@@ -56,6 +56,7 @@
 #include "suites/performance/memory_async_copy_numa.h"
 #include "suites/performance/enqueueLatency.h"
 #include "suites/negative/memory_allocate_negative_tests.h"
+#include "suites/stress/memory_concurrent_tests.h"
 #include "suites/test_common/test_case_template.h"
 #include "suites/test_common/main.h"
 #include "suites/test_common/test_common.h"
@@ -158,6 +159,26 @@ TEST(rocrtstNeg, Memory_Negative_Tests) {
  RunCustomTestEpilog(&mt);
 }

+TEST(rocrtstStress, Memory_Concurrent_Allocate_Test) {
+  MemoryConcurrentTest mt(true, false, false);
+  RunCustomTestProlog(&mt);
+  mt.MemoryConcurrentAllocate();
+  RunCustomTestEpilog(&mt);
+}
+
+TEST(rocrtstStress, Memory_Concurrent_Free_Test) {
+  MemoryConcurrentTest mt(false, true, false);
+  RunCustomTestProlog(&mt);
+  mt.MemoryConcurrentFree();
+  RunCustomTestEpilog(&mt);
+}
+
+TEST(rocrtstStress, Memory_Concurrent_Pool_Info_Test) {
+  MemoryConcurrentTest mt(false, false, true);
+  RunCustomTestProlog(&mt);
+  mt.MemoryConcurrentPoolGetInfo();
+  RunCustomTestEpilog(&mt);
+}
 TEST(rocrtstPerf, ENQUEUE_LATENCY) {
  EnqueueLatency singlePacketequeue(true);
  EnqueueLatency multiPacketequeue(false);