Refactored performance test code

Commented and flattened binary search sample. Change-Id: Ib783292207c956d16003195924a3bcfbbde5039f [ROCm/ROCR-Runtime commit: 8161ebb915]
2017-05-05 23:50:42 -05:00
parent 768644ba7a
commit 9f1065771a
117 changed files with 46627 additions and 0 deletions
@@ -0,0 +1,12 @@
+
+*.o
+*.bin
+*.tar
+*.hsaco
+*.orig
+*.obsol
+*.bk
+*.old
+*.cmake
+build/*
+
@@ -0,0 +1,74 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "common/base_rocr.h"
+#include "common/base_rocr_utils.h"
+#include "common/os.h"
+
+namespace rocrtst {
+
+BaseRocR::BaseRocR(void) {
+  num_iteration_ = 100;
+  signal_.handle = 0;
+  cpu_device_.handle = -1;
+  gpu_device1_.handle = -1;
+  region_.handle = 0;
+  device_pool_.handle = 0;
+  kern_arg_pool_.handle = 0;
+  main_queue_ = nullptr;
+  kernarg_buffer_ = nullptr;
+  kernel_object_ = 0;
+  memset(&aql_, 0, sizeof(aql_));
+  set_requires_profile(-1);
+  set_enable_interrupt(false);
+  orig_hsa_enable_interrupt_ = GetEnv("HSA_ENABLE_INTERRUPT");
+  set_kernel_file_name("");
+  set_verbosity(0);
+}
+
+BaseRocR::~BaseRocR() {
+}
+
+}  // namespace rocrtst
@@ -0,0 +1,294 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file
+/// File containg base class declaration needed for all RocR tests and samples
+/// that allow derived classes to use utility functions.
+
+#ifndef ROCRTST_COMMON_BASE_ROCR_H_
+#define ROCRTST_COMMON_BASE_ROCR_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include "common/common.h"
+#include "common/hsatimer.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+
+namespace rocrtst {
+
+/// Common interface for RocR tests and samples, required for several
+/// common functions
+class BaseRocR {
+ public:
+  BaseRocR(void);
+
+  virtual ~BaseRocR(void);
+
+  ///< Setters and Getters
+
+  void set_gpu_device1(hsa_agent_t in_dev) {
+    gpu_device1_.handle = in_dev.handle;
+  }
+  hsa_agent_t* gpu_device1(void) {
+    return &gpu_device1_;
+  }
+
+  void set_cpu_device(hsa_agent_t in_dev) {
+    cpu_device_.handle = in_dev.handle;
+  }
+  hsa_agent_t* cpu_device(void) {
+    return &cpu_device_;
+  }
+
+  void set_kernel_file_name(const char* in_file_name) {
+    kernel_file_name_ = in_file_name;
+  }
+  std::string const kernel_file_name(void) const {
+    return kernel_file_name_;
+  }
+  const
+
+  void set_kernel_name(std::string in_kernel_name) {
+    kernel_name_ = in_kernel_name;
+  }
+  std::string const kernel_name(void) const {
+    return kernel_name_;
+  }
+
+  void set_kernel_object(uint64_t in_kernel_object) {
+    kernel_object_ = in_kernel_object;
+  }
+  uint64_t kernel_object(void) const {
+    return kernel_object_;
+  }
+
+  void set_signal(hsa_signal_t sig) {
+    signal_.handle = sig.handle;
+  }
+  const hsa_signal_t& signal(void) const {
+    return signal_;
+  }
+
+  void set_profile(hsa_profile_t in_prof) {
+    profile_ = in_prof;
+  }
+  hsa_profile_t profile(void) const {
+    return profile_;
+  }
+
+  uint32_t private_segment_size(void) const {
+    return private_segment_size_;
+  }
+  void set_private_segment_size(uint32_t sz) {
+    private_segment_size_ = sz;
+  }
+
+  void set_group_segment_size(uint32_t sz) {
+    group_segment_size_ = sz;
+  }
+  uint32_t group_segment_size(void) const {
+    return group_segment_size_;
+  }
+
+  void set_group_size(uint32_t sz) {
+    group_size_ = sz;
+  }
+  uint32_t group_size(void) const {
+    return group_size_;
+  }
+
+  void set_main_queue(hsa_queue_t* q) {
+    main_queue_ = q;
+  }
+  hsa_queue_t* main_queue(void) const {
+    return main_queue_;
+  }
+
+  hsa_kernel_dispatch_packet_t& aql(void) {
+    return aql_;
+  }
+
+  hsa_region_t& region(void) {
+    return region_;
+  }
+
+  void set_num_iteration(int num) {
+    num_iteration_ = num;
+  }
+  uint32_t num_iteration(void) const {
+    return num_iteration_;
+  }
+
+  hsa_amd_memory_pool_t& device_pool(void) {
+    return device_pool_;
+  }
+
+  hsa_amd_memory_pool_t& cpu_pool(void) {
+    return cpu_pool_;
+  }
+
+  hsa_amd_memory_pool_t& kern_arg_pool(void) {
+    return kern_arg_pool_;
+  }
+
+  void set_kernarg_size(uint32_t sz) {
+    kernarg_size_ = sz;
+  }
+  uint32_t kernarg_size(void) const {
+    return kernarg_size_;
+  }
+
+  void set_kernarg_align(uint32_t align) {
+    kernarg_align_ = align;
+  }
+  uint32_t kernarg_align(void) const {
+    return kernarg_align_;
+  }
+
+  void* kernarg_buffer(void) const {
+    return kernarg_buffer_;
+  }
+  void set_kernarg_buffer(void* buffer) {
+    kernarg_buffer_ = buffer;
+  }
+
+  int32_t requires_profile(void) const {
+    return requires_profile_;
+  }
+
+  char* orig_hsa_enable_interrupt() const {
+    return orig_hsa_enable_interrupt_;
+  }
+
+  bool enable_interrupt() const {
+    return enable_interrupt_;
+  }
+
+  void set_title(std::string name) {
+    title_ = name;
+  }
+  std::string title(void) const {
+    return title_;
+  }
+
+  PerfTimer* hsa_timer(void) {
+    return &hsa_timer_;
+  }
+
+  void set_verbosity(uint32_t v) {
+    verbosity_ = v;
+  }
+  uint32_t verbosity(void) const {
+    return verbosity_;
+  }
+
+ protected:
+  void set_requires_profile(int32_t reqd_prof) {
+    requires_profile_ = reqd_prof;
+  }
+
+  void set_enable_interrupt(bool doEnable) {
+    enable_interrupt_ = doEnable;
+  }
+
+ private:
+  uint64_t num_iteration_;   ///< Number of times to execute test
+
+  hsa_signal_t signal_;   ///< Completion signal used for kernel execution
+
+  hsa_queue_t* main_queue_;   ///< AQL queue used for packets
+
+  hsa_agent_t gpu_device1_;   ///< Handle to first GPU found
+
+  hsa_agent_t cpu_device_;   ///< Handle to CPU
+
+  hsa_region_t region_;   ///< TODO(cfreehil): delete this
+
+  hsa_amd_memory_pool_t device_pool_;   ///< Memory pool on gpu pool list
+
+  hsa_amd_memory_pool_t cpu_pool_;   ///< Memory pool on cpu pool list
+
+  hsa_amd_memory_pool_t kern_arg_pool_;   ///< Memory pool suitable for args
+
+  uint64_t kernel_object_;   ///< Handle to kernel code
+
+  std::string brig_file_;   // TODO(cfreehil): delete this
+
+  std::string kernel_file_name_;   ///< Code object file name
+
+  std::string kernel_name_;   ///< Kernel name
+
+  hsa_kernel_dispatch_packet_t aql_;   ///< Kernel dispatch packet
+
+  uint32_t group_segment_size_;   ///< Kernel group seg size
+
+  uint32_t kernarg_size_;   ///< Kernarg memory size
+
+  uint32_t kernarg_align_;   ///< Alignment for kern argument memory
+
+  void* kernarg_buffer_;    ///< Unaligned allocated kernel arg. buffer
+
+  hsa_profile_t profile_;   ///< Device profile.
+
+  uint32_t group_size_;   ///< Number of work items in one group
+
+  uint32_t private_segment_size_;   ///< Kernel private seg size
+
+  int32_t requires_profile_;   ///< Profile required by test (-1 if no req.)
+
+  char* orig_hsa_enable_interrupt_;   ///< Orig. value of HSA_ENABLE_INTERRUPT
+
+  bool enable_interrupt_;   ///< Whether to enable/disable interrupts for test
+
+  std::string title_;   ///< Displayed title of test
+
+  uint32_t verbosity_;   ///< How much additional output to produce
+
+  PerfTimer hsa_timer_;   ///< Timer to be used for timing parts of test
+};
+
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_BASE_ROCR_H_
@@ -0,0 +1,476 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file
+/// Utility functions that act on BaseRocR objects.
+
+#include "common/base_rocr_utils.h"
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string>
+#include "common/base_rocr.h"
+#include "common/helper_funcs.h"
+#include "common/os.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_profiler.h"
+
+namespace rocrtst {
+
+
+#define RET_IF_HSA_UTILS_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
+              __FILE__ << std::endl; \
+    return (err); \
+  } \
+}
+
+hsa_status_t CommonCleanUp(BaseRocR* test) {
+  hsa_status_t err;
+
+  assert(test != nullptr);
+
+  if (nullptr != test->kernarg_buffer()) {
+    err = hsa_amd_memory_pool_free(test->kernarg_buffer());
+    RET_IF_HSA_UTILS_ERR(err);
+    test->set_kernarg_buffer(nullptr);
+  }
+
+  if (nullptr != test->main_queue()) {
+    err = hsa_queue_destroy(test->main_queue());
+    RET_IF_HSA_UTILS_ERR(err);
+    test->set_main_queue(nullptr);
+  }
+
+  if (0 != test->signal().handle) {
+    hsa_signal_t sig;
+    sig.handle = 0;
+
+    err = hsa_signal_destroy(test->signal());
+    RET_IF_HSA_UTILS_ERR(err);
+    test->set_signal(sig);
+  }
+
+  err = hsa_shut_down();
+
+  RET_IF_HSA_UTILS_ERR(err);
+
+  std::string intr_val;
+
+  if (test->orig_hsa_enable_interrupt() == nullptr) {
+    intr_val = "";
+  } else {
+    intr_val = test->orig_hsa_enable_interrupt();
+  }
+
+  SetEnv("HSA_ENABLE_INTERRUPT", intr_val.c_str());
+
+  return err;
+}
+
+static const char* PROFILE_STR[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL", };
+
+/// Verify that the machine running the test has the required profile.
+/// This function will verify that the execution machine meets any specific
+/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
+/// \param[in] test Test that provides profile requirements.
+/// \returns bool
+///          - true Machine meets test requirements
+///          - false Machine does not meet test requirements
+static bool CheckProfileAndInform(BaseRocR* test) {
+  if (test->verbosity() > 0) {
+    std::cout << "Target HW Profile is "
+              << PROFILE_STR[test->profile()] << std::endl;
+  }
+
+  if (test->requires_profile() == -1) {
+    if (test->verbosity() > 0) {
+      std::cout << "Test can run on any profile. OK." << std::endl;
+    }
+    return true;
+  } else {
+    std::cout << "Test requires " << PROFILE_STR[test->requires_profile()]
+              << ". ";
+
+    if (test->requires_profile() != test->profile()) {
+      std::cout << "Not Running." << std::endl;
+      return false;
+    } else {
+      std::cout << "OK." << std::endl;
+      return true;
+    }
+  }
+}
+
+/// Helper function to process error returned from
+///  iterate function like hsa_amd_agent_iterate_memory_pools
+/// \param[in] Error returned from iterate call
+/// \returns HSA_STATUS_SUCCESS iff iterate call succeeds in finding
+///  what was being searched for
+static hsa_status_t ProcessIterateError(hsa_status_t err) {
+  if (err == HSA_STATUS_INFO_BREAK) {
+    err = HSA_STATUS_SUCCESS;
+  } else if (err == HSA_STATUS_SUCCESS) {
+    // This actually means no pool was found.
+    err = HSA_STATUS_ERROR;
+  }
+  return err;
+}
+
+hsa_status_t SetPoolsTypical(BaseRocR* test) {
+  hsa_status_t err;
+
+  err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
+        rocrtst::FindStandardPool, &test->cpu_pool());
+  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
+
+  err = hsa_amd_agent_iterate_memory_pools(*test->gpu_device1(),
+        rocrtst::FindStandardPool, &test->device_pool());
+  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
+
+  err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
+        rocrtst::FindKernArgPool, &test->kern_arg_pool());
+  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t InitAndSetupHSA(BaseRocR* test) {
+  hsa_agent_t gpu_device1;
+  hsa_agent_t cpu_device;
+  hsa_status_t err;
+  hsa_signal_t sig;
+
+  if (test->enable_interrupt()) {
+    SetEnv("HSA_ENABLE_INTERRUPT", "1");
+  }
+
+  err = hsa_init();
+  RET_IF_HSA_UTILS_ERR(err);
+
+  gpu_device1.handle = 0;
+  err = hsa_iterate_agents(FindGPUDevice, &gpu_device1);
+  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
+  test->set_gpu_device1(gpu_device1);
+
+  cpu_device.handle = 0;
+  err = hsa_iterate_agents(FindCPUDevice, &cpu_device);
+  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
+  test->set_cpu_device(cpu_device);
+
+  if (0 == gpu_device1.handle) {
+    std::cout << "GPU Device is not Created properly!" << std::endl;
+    RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
+  }
+
+  if (0 == cpu_device.handle) {
+    std::cout << "CPU Device is not Created properly!" << std::endl;
+    RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
+  }
+
+  if (test->verbosity() > 0) {
+    char name[64] = {0};
+    err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_NAME, name);
+    RET_IF_HSA_UTILS_ERR(err);
+    std::cout << "The device name is " << name << std::endl;
+  }
+
+  hsa_profile_t profile;
+  err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_PROFILE, &profile);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_profile(profile);
+
+  if (!CheckProfileAndInform(test)) {
+    return HSA_STATUS_ERROR;
+  }
+
+  err = hsa_signal_create(1, 0, NULL, &sig);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_signal(sig);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+bool CheckProfile(BaseRocR const* test) {
+  if (test->requires_profile() == -1) {
+    return true;
+  } else {
+    return (test->requires_profile() == test->profile());
+  }
+}
+hsa_status_t LoadKernelFromObjFile(BaseRocR* test) {
+  hsa_status_t err;
+  hsa_code_object_reader_t code_obj_rdr = {0};
+  hsa_executable_t executable = {0};
+
+  assert(test != nullptr);
+  hsa_agent_t* agent = test->gpu_device1();  // Assume GPU agent for now
+  std::string obj_file = "./" + test->kernel_file_name();
+  std::string kern_name = test->kernel_name();
+
+  hsa_file_t file_handle = open(obj_file.c_str(), O_RDONLY);
+
+  if (file_handle == -1) {
+    std::cout << "failed to open " << obj_file.c_str() << " at line "
+              << __LINE__ << ", file: " << __FILE__ << std::endl;
+
+    return (hsa_status_t) errno;
+  }
+
+  err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
+  RET_IF_HSA_UTILS_ERR(err);
+  close(file_handle);
+
+  err = hsa_executable_create_alt(HSA_PROFILE_FULL,
+                                  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+                                                          NULL, &executable);
+  RET_IF_HSA_UTILS_ERR(err);
+  err = hsa_executable_load_agent_code_object(executable, *agent, code_obj_rdr,
+        NULL, NULL);
+  RET_IF_HSA_UTILS_ERR(err);
+  err = hsa_executable_freeze(executable, NULL);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  hsa_executable_symbol_t kern_sym;
+  err = hsa_executable_get_symbol(executable, NULL, kern_name.c_str(), *agent,
+                                  0, &kern_sym);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  uint64_t codeHandle;
+  err = hsa_executable_symbol_get_info(kern_sym,
+                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_kernel_object(codeHandle);
+
+  uint32_t val;
+  err = hsa_executable_symbol_get_info(kern_sym,
+                HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &val);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_private_segment_size(val);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &val);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_group_segment_size(val);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &val);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_kernarg_size(val);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+           HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &val);
+  RET_IF_HSA_UTILS_ERR(err);
+  test->set_kernarg_align(val);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
+                         uint32_t num_pkts, bool do_profile) {
+  hsa_status_t err;
+
+  if (num_pkts == 0) {
+    err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+                             &num_pkts);
+    RET_IF_HSA_UTILS_ERR(err);
+  }
+
+  if (do_profile) {
+    err = hsa_ext_tools_queue_create_profiled(device,
+          num_pkts, HSA_QUEUE_TYPE_SINGLE, NULL,
+          NULL, UINT32_MAX, UINT32_MAX, queue);
+    RET_IF_HSA_UTILS_ERR(err);
+  } else {
+    err = hsa_queue_create(device, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL,
+                           NULL, UINT32_MAX, UINT32_MAX, queue);
+    RET_IF_HSA_UTILS_ERR(err);
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+void InitializeAQLPacket(const BaseRocR* test,
+                         hsa_kernel_dispatch_packet_t* aql) {
+  assert(aql != nullptr);
+
+  if (aql == nullptr) {
+    return;
+  }
+
+  aql->header = 0;   // Set this right before doorbell ring
+
+  aql->setup = 1;
+  aql->workgroup_size_x = 256;
+  aql->workgroup_size_y = 1;
+  aql->workgroup_size_z = 1;
+
+  aql->grid_size_x = (uint64_t) 256;  // manual_input*group_input; workg max sz
+  aql->grid_size_y = 1;
+  aql->grid_size_z = 1;
+
+  aql->private_segment_size = test->private_segment_size();
+
+  aql->group_segment_size = test->group_segment_size();
+
+  // Pin kernel code and the kernel argument buffer to the aql packet->
+  aql->kernel_object = test->kernel_object();
+
+  aql->kernarg_address = NULL;
+  aql->completion_signal.handle = test->signal().handle;
+
+  return;
+}
+
+void WriteAQLToQueue(BaseRocR* test) {
+  assert(test);
+  assert(test->main_queue());
+
+  void *queue_base = test->main_queue()->base_address;
+  const uint32_t queue_mask = test->main_queue()->size - 1;
+  uint64_t que_idx = hsa_queue_add_write_index_relaxed(test->main_queue(), 1);
+
+  hsa_kernel_dispatch_packet_t* staging_aql_packet = &test->aql();
+  hsa_kernel_dispatch_packet_t* queue_aql_packet;
+
+  queue_aql_packet =
+       &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
+                                                        [que_idx & queue_mask];
+
+  queue_aql_packet->workgroup_size_x = staging_aql_packet->workgroup_size_x;
+  queue_aql_packet->workgroup_size_y = staging_aql_packet->workgroup_size_y;
+  queue_aql_packet->workgroup_size_z = staging_aql_packet->workgroup_size_z;
+  queue_aql_packet->grid_size_x = staging_aql_packet->grid_size_x;
+  queue_aql_packet->grid_size_y = staging_aql_packet->grid_size_y;
+  queue_aql_packet->grid_size_z = staging_aql_packet->grid_size_z;
+  queue_aql_packet->private_segment_size =
+                                     staging_aql_packet->private_segment_size;
+  queue_aql_packet->group_segment_size =
+                                       staging_aql_packet->group_segment_size;
+  queue_aql_packet->kernel_object = staging_aql_packet->kernel_object;
+  queue_aql_packet->kernarg_address = staging_aql_packet->kernarg_address;
+  queue_aql_packet->completion_signal = staging_aql_packet->completion_signal;
+}
+
+hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args, size_t arg_size) {
+  void* kern_arg_buf = nullptr;
+  hsa_status_t err;
+  size_t buf_size;
+  size_t req_align;
+  assert(args != nullptr);
+  assert(test != nullptr);
+
+  req_align = test->kernarg_align();
+  // Allocate enough extra space for alignment adjustments if ncessary
+  buf_size = arg_size + (req_align << 1);
+
+  err = hsa_amd_memory_pool_allocate(test->kern_arg_pool(), buf_size, 0,
+                                     reinterpret_cast<void**>(&kern_arg_buf));
+  RET_IF_HSA_UTILS_ERR(err);
+
+  test->set_kernarg_buffer(kern_arg_buf);
+
+  void *adj_kern_arg_buf = rocrtst::AlignUp(kern_arg_buf, req_align);
+
+  assert(arg_size >= test->kernarg_size());
+  assert(((uintptr_t)adj_kern_arg_buf + arg_size) <
+                                        ((uintptr_t)kern_arg_buf + buf_size));
+
+  err = hsa_memory_copy_workaround_cpu(adj_kern_arg_buf, args, arg_size);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, kern_arg_buf);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  test->aql().kernarg_address = adj_kern_arg_buf;
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t AllocAndAllowAccess(BaseRocR* test, size_t len,
+                                  hsa_amd_memory_pool_t pool, void**buffer) {
+  hsa_status_t err;
+
+  err = hsa_amd_memory_pool_allocate(pool, len, 0, buffer);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, *buffer);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  return err;
+}
+
+hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
+    size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, BaseRocR* test) {
+
+  hsa_status_t err;
+
+  void *tmp_mem;
+
+  err = hsa_amd_memory_pool_allocate(test->cpu_pool(), count, 0, &tmp_mem);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, tmp_mem);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  (void)memset(tmp_mem, value, count);
+
+  err = hsa_memory_copy_workaround_gen(ptr, tmp_mem, count, dst_ag, src_ag);
+  RET_IF_HSA_UTILS_ERR(err);
+
+  hsa_amd_memory_pool_free(tmp_mem);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+#undef RET_IF_HSA_UTILS_ERR
+
+}  // namespace rocrtst
@@ -0,0 +1,172 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef ROCRTST_COMMON_BASE_ROCR_UTILS_H_
+#define ROCRTST_COMMON_BASE_ROCR_UTILS_H_ 1
+
+/// \file
+/// Prototypes of utility functions that act on BaseRocR objects.
+
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+
+namespace rocrtst {
+
+/// Open binary kernel object file and set all member data related to the
+/// kernel. Assumes that input test already has the kernel file name and
+/// kernel function specifed
+/// \param[in] test Test for which the kernel will be loaded.
+/// \returns HSA_STATUS_SUCCESS if no errors
+hsa_status_t LoadKernelFromObjFile(BaseRocR* test);
+/// Do initialization tasks for HSA test program. This includes calling
+/// hsa_init(), finding and setting the cpu and gpu agent member variables,
+/// creating the signal needed for queueing AQL packets and checking
+/// HW requirements.
+/// \param[in] test Test to initialize
+/// \returns HSA_STATUS_SUCCESS if no errors
+hsa_status_t InitAndSetupHSA(BaseRocR* test);
+
+/// For the provided device agent, create an AQL queue
+/// \param[in] device Device for which a queue is to be created
+/// \param[out] queue Address to which created queue pointer will be written
+/// \param[in] num_pkts Size of the queue to create
+/// \param[in] do_profile [Optional] Specificy whether profiled queue should
+///  be created
+/// \returns  HSA_STATUS_SUCCESS if no errors encountered
+hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
+                         uint32_t num_pkts = 0, bool do_profile = false);
+
+/// This function sets some reasonable default values for an AQL packet.
+/// Override any field as necessary after calling this function.
+/// \param[in] test Test from which information to populate aql packet can
+/// be drawn.
+/// \param[inout] aql Caller provided pointer to aql packet that will be
+/// populated
+/// \returns void
+void InitializeAQLPacket(const BaseRocR* test,
+                         hsa_kernel_dispatch_packet_t* aql);
+
+/// This function writes all of the aql packet fields to the queue besides
+/// "setup" and "header". This assumes all the aql fields have be set
+/// appropriately.
+/// \param[in] test Test containing the queue and aql packet to be written.
+/// \returns void
+void WriteAQLToQueue(BaseRocR* test);
+
+/// This function writes the first 32 bits of an aql packet to the provided
+/// aql packet. This function is meant to be called immediately before
+/// ringing door_bell signal.
+/// \param[in] header Value to be written to header field
+/// \param[in] setup Value to be written to setup field
+/// \param[in] queue_packet Start address of in queue memory of aql packet to
+/// be written
+/// \returns void
+inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
+                                hsa_kernel_dispatch_packet_t* queue_packet) {
+  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
+                                    header | (setup <<16), __ATOMIC_RELEASE);
+}
+
+/// Perform common operations to clean up after executing a test. Specifically,
+/// hsa_shut_down() is called and environment variables that were changed are
+/// reset to their original values.
+/// \param[in] test Test for which clean up with be performed
+/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
+///   error code otherwise.
+hsa_status_t CommonCleanUp(BaseRocR* test);
+
+///  Check to see if target machine has the necessary profile to run the
+///  provided test.
+///  \param[1] test The test that specifies the required profile.
+bool CheckProfile(BaseRocR const* test);
+
+/// Allocate memory from the kernel args pool and write the provided argument
+/// data to the kernel arg memory. Assumes kern_arg memory pool has been
+/// assigned. The amount of memory allocated will actually be \p arg_size
+/// plus the alignment required by the kernel arguments. The argument will
+/// be written with the proper alignment within the allocated buffer.
+/// \p test kernarg_buffer() will point to the allocated buffer, and it should
+/// be freed when the kernel is no longer being used.
+/// \param test Test from which to find kern_arg pool to write arguments
+/// \param args pointer to block of data containing kernel arguments to be
+///  written. Arguments are assumed to be of the correct placement, length,
+///  and with any padding that is expected by the OpenCL kernel
+/// \param arg_size Size of the kernel arg data (including padding) to be
+/// written
+/// \returns HSA_STATUS_SUCCESS if no errors
+hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args,
+                                 size_t arg_size);
+
+/// This function will set the cpu and gpu memory pools to the type used in
+/// many applications.
+/// \param[in] test Test that provides profile requirements.
+/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
+///   error code otherwise.
+hsa_status_t SetPoolsTypical(BaseRocR* test);
+
+/// Allocate memory from a specified pool and grant both standard BaseRocR
+/// agents access
+/// \param[in] test Test having the agents to which access is granted
+/// \param[in] len Size of the memory buffer to allocate
+/// \pool[in] Pool from which to allocate memory
+/// \buffer[out] Address of pointer which will point to newly allocated memory
+///  upon return
+///  \returns HSA_STATUS_OK if no errors
+hsa_status_t AllocAndAllowAccess(BaseRocR* test, size_t len,
+                                  hsa_amd_memory_pool_t pool, void**buffer);
+
+/// Work-around for hsa_amd_memory_fill, which is currently broken.
+/// \param[in] ptr Pointer to start of memory location to be filled
+/// \param[in] value Value to write to each byte of input buffer
+/// \param[in] count Size of buffer to fill
+/// \param[in] dst_ag Agent owning the buffer to be filled
+/// \param[in] src_ag Agent wanting to do the fill
+/// \param[in] test Test that has handles to cpu and gpu agents that can own
+/// either source or destination of fill
+/// \returns HSA_STATUS_OK if not errors
+hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
+      size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, BaseRocR* test);
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_BASE_ROCR_UTILS_H_
@@ -0,0 +1,403 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file
+/// Implementation of utility functions used by RocR applications
+#include "common/common.h"
+#include <assert.h>
+#include <sstream>
+#include <string>
+namespace rocrtst {
+
+
+#define RET_IF_HSA_COMMON_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
+              __FILE__ << ". Call returned " << err << std::endl; \
+    return (err); \
+  } \
+}
+
+static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
+                                                hsa_device_type_t dev_type) {
+  assert(data != nullptr);
+
+  if (data == nullptr) {
+    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  hsa_device_type_t hsa_device_type;
+  hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
+                                &hsa_device_type);
+  RET_IF_HSA_COMMON_ERR(hsa_error_code);
+
+  if (hsa_device_type == dev_type) {
+    *(reinterpret_cast<hsa_agent_t*>(data)) = agent;
+    return HSA_STATUS_INFO_BREAK;
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
+  return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
+}
+
+hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
+  return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
+}
+
+/// Ennumeration that indicates whether a pool property must be present or not.
+/// This is meant to be used by FindPool
+typedef enum {
+  POOL_PROP_OFF = 0,   ///< The property must be present.
+  POOL_PROP_ON,        ///< The property must not be present.
+  POOL_PROP_DONT_CARE  ///< We don't care if the property is present or not.
+} pool_prop_t;
+
+static hsa_status_t
+FindPool(hsa_amd_memory_pool_t pool, void* data, hsa_amd_segment_t in_segment,
+    pool_prop_t accessible_by_all, pool_prop_t kern_arg,
+                                                    pool_prop_t fine_grain) {
+  if (nullptr == data) {
+    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  hsa_status_t err;
+  hsa_amd_segment_t segment;
+  uint32_t flag;
+
+  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
+                                     &segment);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  if (in_segment != segment) {
+    return HSA_STATUS_SUCCESS;
+  }
+
+  if (HSA_AMD_SEGMENT_GLOBAL == in_segment) {
+    err = hsa_amd_memory_pool_get_info(pool,
+                               HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
+    RET_IF_HSA_COMMON_ERR(err);
+
+    if (kern_arg != POOL_PROP_DONT_CARE) {
+      uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
+      if ((karg_st == 0 && kern_arg == POOL_PROP_ON) ||
+          (karg_st != 0 && kern_arg == POOL_PROP_OFF)) {
+        return HSA_STATUS_SUCCESS;
+      }
+    }
+    if (fine_grain != POOL_PROP_DONT_CARE) {
+      uint32_t fg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED;
+      if ((fg_st == 0 && fine_grain == POOL_PROP_ON) ||
+          (fg_st != 0 && fine_grain == POOL_PROP_OFF)) {
+        return HSA_STATUS_SUCCESS;
+      }
+    }
+  }
+
+  if (accessible_by_all != POOL_PROP_DONT_CARE) {
+    bool access_read;
+    err = hsa_amd_memory_pool_get_info(pool,
+          (hsa_amd_memory_pool_info_t)
+                    HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_read);
+    RET_IF_HSA_COMMON_ERR(err);
+
+    if (((!access_read) && accessible_by_all == POOL_PROP_ON) ||
+        (access_read  && (accessible_by_all == POOL_PROP_OFF))) {
+      return HSA_STATUS_SUCCESS;
+    }
+  }
+
+  *(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
+  return HSA_STATUS_INFO_BREAK;
+}
+
+hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
+  return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
+                                          POOL_PROP_OFF, POOL_PROP_DONT_CARE);
+}
+
+hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
+    return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
+                                            POOL_PROP_ON, POOL_PROP_DONT_CARE);
+}
+hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
+  return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_ON,
+                                          POOL_PROP_OFF, POOL_PROP_DONT_CARE);
+}
+
+static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
+                                        std::string* out_str) {
+  hsa_status_t err;
+
+  uint32_t global_flag = 0;
+
+  assert(out_str != nullptr);
+
+  *out_str = "";
+
+  err = hsa_amd_memory_pool_get_info(pool,
+                       HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  std::vector < std::string > flags;
+
+  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
+    flags.push_back("KERNARG");
+  }
+
+  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
+    flags.push_back("FINE GRAINED");
+  }
+
+  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) {
+    flags.push_back("COARSE GRAINED");
+  }
+
+  if (flags.size() > 0) {
+    *out_str += flags[0];
+  }
+
+  for (size_t i = 1; i < flags.size(); i++) {
+    *out_str += ", " + flags[i];
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
+                                 std::string const *ind_lvl) {
+  uint32_t segment;
+  hsa_status_t err;
+
+  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
+                                     &segment);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
+  std::string seg_str = "";
+  std::string tmp_str;
+
+  switch (segment) {
+    case HSA_AMD_SEGMENT_GLOBAL:
+      err = MakeGlobalFlagsString(pool, &tmp_str);
+      RET_IF_HSA_COMMON_ERR(err);
+
+      seg_str += "GLOBAL; FLAGS: " + tmp_str;
+      break;
+
+    case HSA_AMD_SEGMENT_READONLY:
+      seg_str += "READONLY";
+      break;
+
+    case HSA_AMD_SEGMENT_PRIVATE:
+      seg_str += "PRIVATE";
+      break;
+
+    case HSA_AMD_SEGMENT_GROUP:
+      seg_str += "GROUP";
+      break;
+
+    default:
+      std::cout << "Not Supported" << std::endl;
+      break;
+  }
+
+  fprintf(stdout, "%-35s\n", seg_str.c_str());
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
+                                uint32_t indent) {
+  hsa_status_t err;
+  std::string ind_lvl(indent, ' ');
+
+  DumpSegment(pool, &ind_lvl);
+
+  // Get the size of the POOL
+  size_t pool_size = 0;
+  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
+                                     &pool_size);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  std::string sz_str = std::to_string(pool_size / 1024) + "KB";
+  fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
+          sz_str.c_str());
+
+  bool alloc_allowed = false;
+  err = hsa_amd_memory_pool_get_info(pool,
+             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
+          (alloc_allowed ? "TRUE" : "FALSE"));
+
+  size_t alloc_granule = 0;
+  err = hsa_amd_memory_pool_get_info(pool,
+             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &alloc_granule);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  std::string gr_str = std::to_string(alloc_granule / 1024) + "KB";
+  fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
+          gr_str.c_str());
+
+  size_t pool_alloc_alignment = 0;
+  err = hsa_amd_memory_pool_get_info(pool,
+                           HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
+                                                       &pool_alloc_alignment);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  std::string al_str = std::to_string(pool_alloc_alignment / 1024) + "KB";
+  fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
+          al_str.c_str());
+
+  bool pl_access = 0;
+  err = hsa_amd_memory_pool_get_info(pool,
+                      HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pl_access);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
+          (pl_access ? "TRUE" : "FALSE"));
+
+  return HSA_STATUS_SUCCESS;
+}
+
+static const char* Types[] = {"HSA_EXT_POINTER_TYPE_UNKNOWN",
+                              "HSA_EXT_POINTER_TYPE_HSA",
+                              "HSA_EXT_POINTER_TYPE_LOCKED",
+                              "HSA_EXT_POINTER_TYPE_GRAPHICS",
+                              "HSA_EXT_POINTER_TYPE_IPC"
+                             };
+
+hsa_status_t DumpPointerInfo(void* ptr) {
+  hsa_amd_pointer_info_t info;
+  hsa_agent_t* agents;
+  uint32_t count;
+  hsa_status_t err;
+
+  err = hsa_amd_pointer_info(ptr, &info, malloc, &count, &agents);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  std::cout << "Info for ptr: " << ptr << std::endl;
+  std::cout << "CPU ptr: " << reinterpret_cast<void*>(info.hostBaseAddress) <<
+                                                                     std::endl;
+  std::cout << "GPU ptr: " << reinterpret_cast<void*>(info.agentBaseAddress)
+                                                                  << std::endl;
+  std::cout << "Size: " << info.sizeInBytes << std::endl;
+  std::cout << "Type: " << Types[info.type] << std::endl;
+  std::cout << "UsrPtr " << reinterpret_cast<void*>(info.userData) <<
+                                                                     std::endl;
+  std::cout << "Accessible by: ";
+
+  for (uint32_t i = 0; i < count; i++) {
+    std::cout << agents[i].handle << " ";
+  }
+
+  std::cout << " ;[EOM]" << std::endl;
+  free(agents);
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t hsa_memory_fill_workaround_cpu(void* ptr, uint32_t value,
+                                                            size_t count) {
+  (void)memset(ptr, value, count);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t hsa_memory_copy_workaround_cpu(void* dst, const void *src,
+                                                            size_t size) {
+  (void)memcpy(dst, src, size);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+hsa_status_t hsa_memory_copy_workaround_gen(void* dst, const void *src,
+                       size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
+  hsa_signal_t s;
+  hsa_status_t err;
+
+  err = hsa_signal_create(1, 0, NULL, &s);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
+  RET_IF_HSA_COMMON_ERR(err);
+
+  if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
+                                   UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
+    err = HSA_STATUS_ERROR;
+    std::cout << "Async copy signal error" << std::endl;
+
+    RET_IF_HSA_COMMON_ERR(err);
+  }
+
+  err = hsa_signal_destroy(s);
+
+  RET_IF_HSA_COMMON_ERR(err);
+
+  return err;
+}
+
+/*! \brief Writes to the buffer and increments the write pointer to the
+ *         buffer. Also, ensures that the argument is written to an
+ *         aligned memory as specified. Return the new write pointer.
+ *
+ * @param dst The write pointer to the buffer
+ * @param src The source pointer
+ * @param size The size in bytes to copy
+ * @param alignment The alignment to follow while writing to the buffer
+ */
+#if 0
+inline void *
+addArg(void * dst, const void* src, size_t size, uint32_t alignment) {
+    dst = rocrtst::AlignUp(dst, alignment);
+    ::memcpy(dst, src, size);
+    return dst + size;
+}
+#endif
+#undef RET_IF_HSA_COMMON_ERR
+
+}  // namespace rocrtst
@@ -0,0 +1,174 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file
+/// RocR related helper functions for sequeneces that come up frequently
+
+#ifndef ROCRTST_COMMON_COMMON_H_
+#define ROCRTST_COMMON_COMMON_H_
+
+#include <stdio.h>
+#include <string.h>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include "hsa/hsa_ext_finalize.h"
+
+namespace rocrtst {
+
+#if defined(_MSC_VER)
+#define ALIGNED_(x) __declspec(align(x))
+#else
+#if defined(__GNUC__)
+#define ALIGNED_(x) __attribute__ ((aligned(x)))
+#endif  // __GNUC__
+#endif  // _MSC_VER
+
+#define MULTILINE(...) # __VA_ARGS__
+
+// define below should be deleted. Leaving in commented out until code that
+// refers to it has been corrected
+// #define HSA_ARGUMENT_ALIGN_BYTES 16
+
+/// If the provided agent is associated with a GPU, return that agent through
+/// output parameter. This function is meant to be the call-back function used
+/// with hsa_iterate_agents to find GPU agents.
+/// \param[in] agent Agent to evaluate if GPU
+/// \param[out] data If agent is associated with a GPU, this pointer will point
+///  to the agent upon return
+/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
+hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data);
+
+/// If the provided agent is associated with a CPU, return that agent through
+/// output parameter. This function is meant to be the call-back function used
+/// with hsa_iterate_agents to find CPU agents.
+/// \param[in] agent Agent to evaluate if CPU
+/// \param[out] data If agent is associated with a CPU, this pointer will point
+///  to the agent upon return
+/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
+hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data);
+
+// TODO(cfreehil): get rid of FindGlobalPool and replace with FindStandardPool
+hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data);
+
+/// Find a "standard" pool. By this, we mean not a kernel args pool.
+/// The pool found will have the following properties:
+///     HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
+///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: Off
+///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
+/// This function is meant to be the call-back function used
+/// with hsa_amd_agent_iterate_memory_pools.
+/// \param[in] pool Pool to evaluate for required properties
+/// \param[in] data If pool meets criteria, this pointer will point
+///  to the pool upon return
+/// \returns hsa_status_t
+///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
+///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
+///      -else return an appropriate error code for any error encountered
+hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data);
+
+/// Find a "kernel arg" pool.
+/// The pool found will have the following properties:
+///     HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
+///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: On
+///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
+/// This function is meant to be the call-back function used
+/// with hsa_amd_agent_iterate_memory_pools.
+/// \param[in] pool Pool to evaluate for required properties
+/// \param[in] data If pool meets criteria, this pointer will point
+///  to the pool upon return
+/// \returns hsa_status_t
+///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
+///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
+///      -else return an appropriate error code for any error encountered
+hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);
+
+/// Dump information about provided memory pool to STDOUT
+/// \param[in] pool Pool to gather and dump information for
+/// \param[in] indent Number of spaces to indent output.
+/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
+hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
+                                                         uint32_t indent = 0);
+
+/// Dump information about a provided pointer to STDOUT.
+/// \param[in] ptr Pointer about which information is dumped.
+/// \returns HSA_STATUS_SUCCESS if there are no errors
+hsa_status_t DumpPointerInfo(void* ptr);
+
+/// This is a work-around for filling cpu-memory to be used until
+/// hsa_amd_memory_fill is fixed. Should only be used for cpu memory.
+/// \param[in] ptr Start address of memory to be filled.
+/// \param[in] value Value to fill buffer with
+/// \param[in] count Size of buffer to fill
+/// \returns HSA_STATUS_SUCCESS if there are no errors
+hsa_status_t hsa_memory_fill_workaround_cpu(void* ptr, uint32_t value,
+                                                            size_t count);
+
+/// This is a work-around for copying cpu-memory to be used until
+/// hsa_amd_memory_copy is fixed. Should only be used for cpu memory.
+/// \param[in] dst Destination address of memory to be copied
+/// \param[in] src Source address of memory to be copied
+/// \param[in] size Size of buffer to fill
+/// \returns HSA_STATUS_SUCCESS if there are no errors
+hsa_status_t hsa_memory_copy_workaround_cpu(void* dst, const void *src,
+                                                            size_t size);
+
+/// This is a work-around for copying memory to be used until
+/// hsa_amd_memory_copy is fixed. Should be used when gpu local memory is
+/// involved.
+/// \param[in] dst Destination address of memory to be copied
+/// \param[in] src Source address of memory to be copied
+/// \param[in] size Size of buffer to fill
+/// \param[in] dst_ag Destination agent handle
+/// \param[in] src_ag Source agent handle
+/// \returns HSA_STATUS_SUCCESS if there are no errors
+hsa_status_t hsa_memory_copy_workaround_gen(void* dst, const void *src,
+                       size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag);
+
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_COMMON_H_
@@ -0,0 +1,262 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+
+#include "common/helper_funcs.h"
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+#include <cmath>
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace rocrtst {
+
+
+template<typename T>
+void PrintArray(const std::string header, const T* data, const int width,
+                const int height) {
+  std::cout << std::endl << header << std::endl;
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      std::cout << data[i * width + j] << " ";
+    }
+
+    std::cout << std::endl;
+  }
+
+  std::cout << std::endl;
+}
+
+template<typename T>
+int FillRandom(T* arrayPtr,
+               const int width,
+               const int height,
+               const T rangeMin,
+               const T rangeMax,
+               unsigned int seed) {
+  if (!arrayPtr) {
+    return 1;
+  }
+
+  if (!seed) {
+    seed = (unsigned int)time(NULL);
+  }
+
+  srand(seed);
+  double range = static_cast<double>(rangeMax - rangeMin) + 1.0;
+
+  /* random initialisation of input */
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      int index = i * width + j;
+      arrayPtr[index] = rangeMin + T(range * rand_r(&seed) / (RAND_MAX + 1.0));
+    }
+  }
+
+  return 0;
+}
+
+uint64_t RoundToPowerOf2(uint64_t val) {
+  int bytes = sizeof(uint64_t);
+
+  val--;
+
+  for (int i = 0; i < bytes; i++) {
+    val |= val >> (1 << i);
+  }
+
+  val++;
+
+  return val;
+}
+
+bool IsPowerOf2(uint64_t val) {
+  uint64_t tmp = val;
+
+  if ((tmp & (-tmp)) - tmp == 0 && tmp != 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool
+Compare(const float* refData, const float* data,
+        const int length, const float epsilon) {
+  float error = 0.0f;
+  float ref = 0.0f;
+
+  for (int i = 1; i < length; ++i) {
+    float diff = refData[i] - data[i];
+    error += diff * diff;
+    ref += refData[i] * refData[i];
+  }
+
+  float normRef =::sqrtf(static_cast<float>(ref));
+
+  if (::fabs(static_cast<float>(ref)) < 1e-7f) {
+    return false;
+  }
+
+  float normError = ::sqrtf(static_cast<float>(error));
+  error = normError / normRef;
+
+  return error < epsilon;
+}
+
+bool
+Compare(const double* refData, const double* data,
+        const int length, const double epsilon) {
+  double error = 0.0;
+  double ref = 0.0;
+
+  for (int i = 1; i < length; ++i) {
+    double diff = refData[i] - data[i];
+    error += diff * diff;
+    ref += refData[i] * refData[i];
+  }
+
+  double normRef =::sqrt(static_cast<double>(ref));
+
+  if (::fabs(static_cast<double>(ref)) < 1e-7) {
+    return false;
+  }
+
+  double normError = ::sqrt(static_cast<double>(error));
+  error = normError / normRef;
+
+  return error < epsilon;
+}
+
+intptr_t
+AlignDown(intptr_t value, size_t alignment) {
+    return (intptr_t) (value & ~(alignment - 1));
+}
+
+void *
+AlignDown(void* value, size_t alignment) {
+    return reinterpret_cast<void*>(AlignDown(
+                              reinterpret_cast<uintptr_t>(value), alignment));
+}
+
+void *
+AlignUp(void* value, size_t alignment) {
+    return reinterpret_cast<void*>(
+     AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
+                                                                   alignment));
+}
+
+double CalcMedian(std::vector<double> scores) {
+  double median;
+  size_t size = scores.size();
+
+  if (size % 2 == 0) {
+    median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
+  } else {
+    median = scores[size / 2];
+  }
+
+  return median;
+}
+
+double CalcMean(std::vector<double> scores) {
+  double mean = 0;
+  size_t size = scores.size();
+
+  for (size_t i = 0; i < size; ++i) {
+    mean += scores[i];
+  }
+
+  return mean / size;
+}
+
+double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2) {
+  double mean = 0;
+  size_t size = v1.size();
+
+  for (size_t i = 0; i < size; i++) {
+    mean += v2[i] - v1[i];
+  }
+
+  return mean / size;
+}
+
+double CalcStdDeviation(std::vector<double> scores, int score_mean) {
+  double ret = 0.0;
+
+  for (size_t i = 0; i < scores.size(); ++i) {
+    ret += (scores[i] - score_mean) * (scores[i] - score_mean);
+  }
+
+  ret /= scores.size();
+
+  return sqrt(ret);
+}
+
+/////////////////////////////////////////////////////////////////
+// Template Instantiations
+/////////////////////////////////////////////////////////////////
+
+template
+void PrintArray<uint32_t>(const std::string, const unsigned int*, int, int);
+
+template
+void PrintArray<float>(const std::string, const float*, int, int);
+
+template
+int FillRandom<uint32_t>(uint32_t* arrayPtr,
+                         const int width, const int height,
+                         uint32_t rangeMin, uint32_t rangeMax,
+                                                           unsigned int seed);
+
+template
+int FillRandom<float>(float* arrayPtr,
+                      const int width, const int height,
+                      float rangeMin, float rangeMax, unsigned int seed);
+
+}  // namespace rocrtst
@@ -0,0 +1,105 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef ROCRTST_COMMON_HELPER_FUNCS_H_
+#define ROCRTST_COMMON_HELPER_FUNCS_H_
+
+/// \file
+/// General-purpose helper functions
+
+#include <string>
+#include <vector>
+namespace rocrtst {
+
+
+bool Compare(const float* refData, const float* data,
+             const int length, const float epsilon = 1e-6f);
+bool Compare(const double* refData, const double* data,
+             const int length, const double epsilon = 1e-6);
+
+/// Calculate the mean number of the vector
+double CalcMean(std::vector<double> scores);
+
+/// Calculate the mean time of difference of the two vectors
+double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2);
+
+/// Return the median value of a vector of doubles
+/// \param[in] scores Vector of doubles
+/// \returns double Median value of provided vector
+double CalcMedian(std::vector<double> scores);
+
+/// Calculate the standard deviation of the vector
+double CalcStdDeviation(std::vector<double> scores, int score_mean);
+
+/// Display an array to std::out
+template<typename T>
+void PrintArray(
+  const std::string header,
+  const T* data,
+  const int width,
+  const int height);
+
+/// Fill an array with random values
+template<typename T>
+int FillRandom(
+  T* arrayPtr,
+  const int width,
+  const int height,
+  const T rangeMin,
+  const T rangeMax,
+  unsigned int seed = 123);
+
+intptr_t AlignDown(intptr_t value, size_t alignment);
+void* AlignDown(void* value, size_t alignment);
+void* AlignUp(void* value, size_t alignment);
+
+/// Rounds to a power of 2
+uint64_t RoundToPowerOf2(uint64_t val);
+
+///  Checks if a value is a power of 2
+bool IsPowerOf2(uint64_t val);
+
+}  // namespace rocrtst
+#endif  //  ROCRTST_COMMON_HELPER_FUNCS_H_
@@ -0,0 +1,225 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "common/hsa_perf_cntrs.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <cassert>
+#include <iostream>
+#include <string>
+#include <vector>
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_profiler.h"
+#include "hsa/amd_hsa_tools_interfaces.h"
+
+namespace rocrtst {
+
+
+static void
+PreDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
+  assert((dispParam->pre_dispatch) &&
+         "Pre Dispatch Callback Param is Malformed");
+
+  hsa_ext_tools_pmu_t* perfMgr =
+    reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
+  hsa_status_t status = hsa_ext_tools_pmu_begin(*perfMgr, dispParam->queue,
+                        dispParam->aql_translation_handle, true);
+  assert((status == HSA_STATUS_SUCCESS) &&
+         "Error in beginning Perf Cntr Session");
+}
+
+static void
+PostDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
+  assert((!dispParam->pre_dispatch) &&
+         "Post Dispatch Callback Param is Malformed");
+
+  hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
+  hsa_status_t status = hsa_ext_tools_pmu_end(*perfMgr, dispParam->queue,
+                        dispParam->aql_translation_handle);
+  assert((status == HSA_STATUS_SUCCESS) &&
+         "Error in endning Perf Cntr Session");
+}
+
+/// Constructor of the class
+RocrPerfCntrApp::RocrPerfCntrApp() : perfMgr_(NULL) {
+}
+
+/// Destructor of the class. Ideally it should delete the
+/// PMU and its counters
+RocrPerfCntrApp::~RocrPerfCntrApp() {
+}
+
+/// Return the number of perf counters
+uint32_t RocrPerfCntrApp::GetNumPerfCntrs() {
+  return uint32_t(cntrList_.size());
+}
+
+/// Return the handle of perf counter at specified index
+CntrInfo* RocrPerfCntrApp::GetPerfCntr(uint32_t idx) {
+  return cntrList_[idx];
+}
+
+/// Print the various fields of Perf Cntrs being programmed.
+bool RocrPerfCntrApp::PrintCntrs() {
+  CntrInfo* info;
+  int size = uint32_t(cntrList_.size());
+
+  for (int idx = 0; idx < size; idx++) {
+    info = cntrList_[idx];
+    std::cout << std::endl;
+    std::cout << "Rocr Perf Cntr Id: " << info->cntrId << std::endl;
+    std::cout << "Rocr Perf Cntr Name: " << info->cntrName << std::endl;
+    std::cout << "Rocr Perf Cntr Blk Id: " << info->blkId << std::endl;
+    std::cout << "Rocr Perf Cntr Value: " << info->cntrResult << std::endl;
+    std::cout << "Rocr Perf Cntr Validation: " << info->cnfType << std::endl;
+    std::cout << std::endl;
+  }
+
+  return true;
+}
+
+// Initialize the list of perf counters
+// block id of kHsaAiCounterBlockSQ = 14 == 0x0E
+hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) {
+  // Initialize the list of Perf Cntrs
+  // Add SQ counter for number of waves
+  CntrInfo* info = NULL;
+  cntrList_.reserve(23);
+
+  // Event for number of Waves
+  info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL,
+                      0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
+  cntrList_.push_back(info);
+
+  // Event for number of Threads
+  info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL,
+                      0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
+  cntrList_.push_back(info);
+
+
+  // Create an instance of Perf Mgr
+  hsa_status_t status;
+  status = hsa_ext_tools_create_pmu(agent, &perfMgr_);
+  assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr Mgr");
+
+  // Process each counter from the list as necessary
+  // each counter descriptor with its perf block handle
+  // and create an instance of counter in that block
+  uint32_t size = GetNumPerfCntrs();
+
+  for (uint32_t idx = 0; idx < size; idx++) {
+    info = GetPerfCntr(idx);
+
+    // Obtain the handle of perf block
+    if (info->blkHndl == NULL) {
+      status = hsa_ext_tools_get_counter_block_by_id(perfMgr_,
+               info->blkId, &info->blkHndl);
+      assert((status == HSA_STATUS_SUCCESS) &&
+             "Error in getting Perf Cntr Blk Hndl");
+    }
+
+    // Create an instance of counter in the perf block
+    status = hsa_ext_tools_create_counter(info->blkHndl, &info->cntrHndl);
+    assert((status == HSA_STATUS_SUCCESS) &&
+           "Error in creating Perf Cntr in Perf Blk");
+
+    // Update the Event Index property of counter
+    uint32_t cntrProp = HSA_EXT_TOOLS_COUNTER_PARAMETER_EVENT_INDEX;
+    status = hsa_ext_tools_set_counter_parameter(info->cntrHndl, cntrProp,
+             sizeof(uint32_t), static_cast<void*>(&info->cntrId));
+    assert((status == HSA_STATUS_SUCCESS) &&
+           "Error in updating Perf Cntr Property Event Index");
+
+    // Enable the updated perf counter
+    status = hsa_ext_tools_set_counter_enabled(info->cntrHndl, true);
+    assert((status == HSA_STATUS_SUCCESS) && "Error in enabing Perf Cntr");
+  }
+
+  return status;
+}
+
+// Register Pre and Post dispatch callbacks
+void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t* queue) {
+  hsa_status_t status;
+  status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback,
+           PostDispatchCallback);
+  assert((status == HSA_STATUS_SUCCESS) &&
+         "Error in registering Pre & Post Dispatch Callbacks");
+  status = hsa_ext_tools_set_callback_arguments(queue, &perfMgr_, &perfMgr_);
+  assert((status == HSA_STATUS_SUCCESS) &&
+         "Error in registering Pre & Post Dispatch Callback Params");
+  return;
+}
+
+// Wait for perf counter collection to complete
+hsa_status_t RocrPerfCntrApp::Wait() {
+  hsa_status_t status;
+  status = hsa_ext_tools_pmu_wait_for_completion(perfMgr_, 5000);
+  assert((status == HSA_STATUS_SUCCESS) &&
+         "Error in Waiting for Perf Cntr Completion");
+  return status;
+}
+
+// Validate perf counter values
+hsa_status_t RocrPerfCntrApp::Validate() {
+  // Retrieve the results of the different Perf Cntrs
+  // and validate them as configured
+  CntrInfo* info = NULL;
+  hsa_status_t status = HSA_STATUS_SUCCESS;
+  uint32_t size = GetNumPerfCntrs();
+
+  for (uint32_t idx = 0; idx < size; idx++) {
+    info = GetPerfCntr(idx);
+    status = hsa_ext_tools_get_counter_result(info->cntrHndl,
+             &info->cntrResult);
+    std::cout << "Value of Perf Cntr is: " << info->cntrResult << std::endl;
+  }
+
+  return status;
+}
+
+}  // namespace rocrtst
@@ -0,0 +1,159 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file
+/// Contains counter related functionality that can be used by samples and
+/// tests.
+#ifndef ROCRTST_COMMON_HSA_PERF_CNTRS_H_
+#define ROCRTST_COMMON_HSA_PERF_CNTRS_H_
+
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_profiler.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <iostream>
+#include <vector>
+#include <string>
+
+namespace rocrtst {
+
+
+typedef enum CntrValCnfType {
+  ///< no counter value validation should be performed
+  CntrValCnf_None,
+
+  ///< counter value should be an exact match to expectedResult
+  CntrValCnf_Exact,
+
+  ///< counter value should be greater than expectedResult
+  CntrValCnf_GreaterThan,
+
+  ///< counter value should be less than expectedResult
+  CntrValCnf_LessThan
+} CntrValCnfType;
+
+/// Struct used to encapsulate Counter Info
+typedef struct CntrInfo {
+  ///< Id of counter in hardware block
+  uint32_t cntrId;
+
+  ///< Name of counter
+  char cntrName[72];
+
+  ///< Handle of perf counter
+  hsa_ext_tools_counter_t cntrHndl;
+
+  ///< Id of hardware block containing the counter
+  uint32_t blkId;
+
+  ///< Handle of counter block
+  hsa_ext_tools_counter_block_t blkHndl;
+
+  ///< Expected value of perf counte
+  uint64_t  expectedResult;
+
+  ///< Value of perf counter expected
+  uint64_t cntrResult;
+
+  ///< Type of validation upon completion of dispatch
+  CntrValCnfType cnfType;
+
+  CntrInfo(uint32_t cntrId, const char* cntrName, void* cntrHndl,
+           uint32_t blkId, void* blkHndl,
+           uint64_t expResult, uint64_t result, CntrValCnfType cnfType) {
+    this->cntrId = cntrId;
+    this->cntrHndl = cntrHndl;
+    this->blkId = blkId;
+    this->blkHndl = blkHndl;
+    this->expectedResult = expResult;
+    this->cntrResult = result;
+    this->cnfType = cnfType;
+    memcpy(this->cntrName, cntrName, strlen(cntrName));
+  }
+} CntrInfo;
+
+class RocrPerfCntrApp {
+ public:
+  // Constructor of the class. Will initialize the list of perf counters
+  // that will be used to program the device
+  RocrPerfCntrApp();
+
+  //  Destructor of the class
+  ~RocrPerfCntrApp();
+
+  // Return the number of perf counters
+  uint32_t GetNumPerfCntrs();
+
+  // Return the handle of perf counter at specified index
+  CntrInfo* GetPerfCntr(uint32_t idx);
+
+  // Print the list of perf counters
+  bool PrintCntrs();
+
+  // Initialize the list of perf counters
+  hsa_status_t Init(hsa_agent_t agent);
+
+  // Register Pre and Post dispatch callbacks
+  void RegisterCallbacks(hsa_queue_t* queue);
+
+  // Wait for perf counter collection to complete
+  hsa_status_t Wait();
+
+  // Validate perf counter values
+  hsa_status_t Validate();
+
+ private:
+  //  Number of queues to create
+  std::vector<CntrInfo*> cntrList_;
+
+  //  Handle of Perf Cntr Manager
+  hsa_ext_tools_pmu_t perfMgr_;
+};
+
+}  // namespace rocrtst
+
+#endif  // ROCRTST_COMMON_HSA_PERF_CNTRS_H_
@@ -0,0 +1,190 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "common/hsatimer.h"
+#include <x86intrin.h>
+
+namespace rocrtst {
+
+static const uint64_t kNanosecondsPerSecond = 1000000000;
+
+PerfTimer::PerfTimer(void) {
+  freq_in_100mhz = MeasureTSCFreqHz();
+}
+
+PerfTimer::~PerfTimer() {
+  while (!_timers.empty()) {
+    Timer* temp = _timers.back();
+    _timers.pop_back();
+    delete temp;
+  }
+}
+
+int PerfTimer::CreateTimer(void) {
+  Timer* newTimer = new Timer;
+  newTimer->_start = 0;
+  newTimer->_clocks = 0;
+
+  newTimer->_freq = kNanosecondsPerSecond;
+
+  /* Push back the address of new Timer instance created */
+  _timers.push_back(newTimer);
+  return static_cast<int>(_timers.size() - 1);
+}
+
+int PerfTimer::StartTimer(int index) {
+  if (index >= static_cast<int>(_timers.size())) {
+    Error("Cannot reset timer. Invalid handle.");
+    return 1;
+  }
+
+// General Linux timing method
+#ifndef _AMD
+  struct timespec s;
+  clock_gettime(CLOCK_MONOTONIC, &s);
+  _timers[index]->_start = (uint64_t) s.tv_sec * kNanosecondsPerSecond
+                           + (uint64_t) s.tv_nsec;
+#else
+
+  // AMD timing method
+
+  unsigned int unused;
+  _timers[index]->_start = __rdtscp(&unused);
+
+#endif
+
+  return 0;
+}
+
+int PerfTimer::StopTimer(int index) {
+  uint64_t n = 0;
+
+  if (index >= static_cast<int>(_timers.size())) {
+    Error("Cannot reset timer. Invalid handle.");
+    return 1;
+  }
+
+  // General Linux timing method
+#ifndef _AMD
+  struct timespec s;
+  clock_gettime(CLOCK_MONOTONIC, &s);
+  n = (uint64_t) s.tv_sec * kNanosecondsPerSecond + (uint64_t) s.tv_nsec;
+#else
+  // AMD Linux timing
+
+  unsigned int unused;
+  n = __rdtscp(&unused);
+#endif
+
+  n -= _timers[index]->_start;
+  _timers[index]->_start = 0;
+
+#ifndef _AMD
+  _timers[index]->_clocks += n;
+#else
+  // convert to ms
+  _timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz;
+  cout << "_AMD is enabled!!!" << endl;
+#endif
+
+  return 0;
+}
+
+void PerfTimer::Error(std::string str) {
+  std::cout << str << std::endl;
+}
+
+double PerfTimer::ReadTimer(int index) {
+  if (index >= static_cast<int>(_timers.size())) {
+    Error("Cannot read timer. Invalid handle.");
+    return 1;
+  }
+
+  double reading = static_cast<double>(_timers[index]->_clocks);
+
+  reading = static_cast<double>(reading / _timers[index]->_freq);
+
+  return reading;
+}
+
+void PerfTimer::ResetTimer(int index) {
+  // Check if index value is over the timer's size
+  if (index >= static_cast<int>(_timers.size())) {
+    Error("Invalid index value\n");
+    exit(1);
+  }
+
+  _timers[index]->_clocks = 0.0;
+  _timers[index]->_start = 0.0;
+}
+
+uint64_t PerfTimer::CoarseTimestampUs() {
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+  return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
+}
+
+uint64_t PerfTimer::MeasureTSCFreqHz() {
+  // Make a coarse interval measurement of TSC ticks for 1 gigacycles.
+  unsigned int unused;
+  uint64_t tscTicksEnd;
+
+  uint64_t coarseBeginUs = CoarseTimestampUs();
+  uint64_t tscTicksBegin = __rdtscp(&unused);
+
+  do {
+    tscTicksEnd = __rdtscp(&unused);
+  }
+  while (tscTicksEnd - tscTicksBegin < 1000000000);
+
+  uint64_t coarseEndUs = CoarseTimestampUs();
+
+  // Compute the TSC frequency and round to nearest 100MHz.
+  uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
+  uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
+  return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
+}
+
+}  // namespace rocrtst
@@ -0,0 +1,106 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef ROCRTST_COMMON_HSATIMER_H_
+#define ROCRTST_COMMON_HSATIMER_H_
+
+#include <stdint.h>
+#include <iostream>
+#include <vector>
+#include <string>
+/// \file
+/// Timer related class.
+
+namespace rocrtst {
+
+class PerfTimer {
+ private:
+  struct Timer {
+    std::string name; /* < name name of time object*/
+    uint64_t _freq; /* < _freq frequency*/
+    uint64_t _clocks; /* < _clocks number of ticks at end*/
+    uint64_t _start; /* < _start start point ticks*/
+  };
+
+  std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
+  double freq_in_100mhz;
+
+ public:
+  PerfTimer(void);
+  ~PerfTimer(void);
+
+  /// Create a new timer.
+  /// \returns A new timer instantance index
+  int CreateTimer(void);
+
+  /// Start the timer associated with the given index
+  /// \param[in] index Index of the timer to start
+  /// \returns int 0 for success, non-zero otherwise
+  int StartTimer(int index);
+
+  /// Stop the timer associated with the given index
+  /// \param[in] Index Index of the timer to stop
+  /// \returns int 0 for success, non-zero otherwise
+  int StopTimer(int index);
+
+  /// Reset the timer to 0
+  /// param[in] Index of the timer to reset
+  /// \returns void
+  void ResetTimer(int index);
+
+  /// Read the time value of the timer associated with the provided index.
+  /// \param[in] index Index of the timer to read
+  /// \returns double Value of the timer
+  double ReadTimer(int index);
+
+ private:
+  void Error(std::string str);
+  uint64_t CoarseTimestampUs();
+  uint64_t MeasureTSCFreqHz();
+};
+
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_HSATIMER_H_
+
@@ -0,0 +1,66 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "common/os.h"
+#include <stdlib.h>
+
+namespace rocrtst {
+
+void SetEnv(const char* env_var_name, const char* env_var_value) {
+  int err = setenv(env_var_name, env_var_value, 1);
+
+  if (0 != err) {
+    printf("Set environment variable failed!\n");
+    exit(1);
+  }
+
+  return;
+}
+
+char* GetEnv(const char* env_var_name) {
+  return getenv(env_var_name);
+}
+
+}  // namespace rocrtst
@@ -0,0 +1,67 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/// \file OS specific functionality
+
+#ifndef ROCRTST_COMMON_OS_H_
+#define ROCRTST_COMMON_OS_H_
+
+#include <stdio.h>
+namespace rocrtst {
+
+/// Set envriroment variable.
+/// \param[in] env_var_name Environment variable to set.
+/// \param[in] env_var_value Value to set environment variable to.
+/// \returns void
+void SetEnv(const char* env_var_name, const char* env_var_value);
+
+/// Get envriroment variable.
+/// \param[in] env_var_name Environment variable to get.
+/// \returns Pointer to string of characters that is the value of the
+///  environment variable.
+char* GetEnv(const char* env_var_name);
+
+}  // namespace rocrtst
+#endif  // ROCRTST_COMMON_OS_H_
@@ -0,0 +1,31 @@
+#
+# Source files for Tests verifying rocrtst Utils library
+#
+set (rocrtstUtilsTestSrcs utils_timer_gtest.cpp)
+set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_timer_test.cpp)
+set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_cpp11_gtest.cpp)
+
+#
+# Header files include path(s).
+#
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${PROJECT_SOURCE_DIR}/utils)
+include_directories(${PROJECT_SOURCE_DIR}/gtest/include)
+
+#
+# Build rule to build an executable object
+#
+add_executable(${ROCRTST_UTIL_TEST_NAME} ${rocrtstUtilsTestSrcs})
+
+#
+# Link unresolved symbols of rocrtst Utils Test executable
+#
+target_link_libraries(${ROCRTST_UTIL_TEST_NAME} ${ROCRTST_LIBS} elf c stdc++ dl pthread rt)
+
+#
+# Install build artifacts into one common location
+#
+INSTALL(TARGETS ${ROCRTST_UTIL_TEST_NAME}
+        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
+        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
+        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,46 @@
+#include<iostream>
+#include<thread>
+#include"gtest/gtest.h"
+
+using std::cout;
+using std::endl;
+
+// @Brief: this function is defined to be executed for thread #1
+static void ThreadEntry1() {
+  cout << "The first thread is launched!" << endl;
+  return;
+}
+// @Brief: this function is defined to be executed for thread #2
+static void ThreadEntry2() {
+  cout << "The second thread is launched!" << endl;
+  return;
+}
+
+// @Brief: google test case added for basic C++11 thread feature.
+// Here, in main function, it will create two threas objects, then,
+// check if each thread are joinable, if so, main thread wait until
+// the spawned threads finish.
+TEST(rocrtstCpp11Feature, BasicThread) {
+  // Define two threads object;
+  std::thread thread1;
+  std::thread thread2;
+
+  // At this point, it should be non-joinable
+  ASSERT_EQ(false, thread1.joinable());
+  ASSERT_EQ(false, thread2.joinable());
+
+  // Assign execution codes to threads;
+  thread1 = std::thread(ThreadEntry1);
+  thread2 = std::thread(ThreadEntry2);
+
+  // Now, the two threads should be joinable
+  ASSERT_EQ(true, thread1.joinable());
+  ASSERT_EQ(true, thread2.joinable());
+
+  // Join the two threads until they finish
+  thread1.join();
+  thread2.join();
+
+  // When execution flow reaches here, it succeed.
+  cout << "Done!" << endl;
+}
@@ -0,0 +1,32 @@
+
+
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "utils_timer_test.hpp"
+
+using namespace std;
+
+class rocrtstUtilsTimerGtest : public ::testing::Test {
+
+ protected:
+
+  // No argument constructor called from Google Test Framework
+  rocrtstUtilsTimerGtest() { };
+
+};
+
+TEST_F(rocrtstUtilsTimerGtest, TestingTimer101) {
+
+  // Create a Hsa Perf Utils Timer Test object.
+  // The test will iterate 108 times with sleep
+  // time of 3 milliseconds per iteration
+  rocrtstUtilsTimerTest* timer = new rocrtstUtilsTimerTest(108, 3);
+
+  // Let the timer object collect data
+  timer->run();
+
+  // Print the statistics of timer object
+  timer->print();
+}
@@ -0,0 +1,45 @@
+
+#include <iostream>
+#include "hsatimer.h"
+#include <unistd.h>
+#include "utils_timer_test.hpp"
+
+using namespace std;
+
+
+
+// Destructor method of test driver
+rocrtstUtilsTimerTest::~rocrtstUtilsTimerTest() { }
+
+// Constructor method of test driver
+//
+// @brief loopCnt number of times to call sleep Api
+//
+// @brief sleepTimer time to sleep in milliseconds
+rocrtstUtilsTimerTest::rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime) :
+  loopCnt_(loopCnt), sleepTime_(sleepTime), total_time_(0) { }
+
+// Execute user defined number of sleep calls and collect the
+// total time taken by such calls
+void rocrtstUtilsTimerTest::run() {
+
+  double time;
+  PerfTimer timer;
+  uint32_t index = timer.CreateTimer();
+
+  for (uint32_t idx; idx < loopCnt_; idx++) {
+
+    timer.StartTimer(index);
+    usleep(sleepTime_);
+    timer.StopTimer(index);
+    time = timer.ReadTimer(index);
+    total_time_ += time;
+  }
+}
+
+// Print time reported by Hsa Perf Utils Timer service
+void rocrtstUtilsTimerTest::print() {
+
+  std::cout << "Time taken by " << loopCnt_;
+  std::cout << " iterations of sleep is: " << total_time_ << std::endl;
+}
@@ -0,0 +1,38 @@
+#ifndef ROCRTST_UTILS_TIMER_TEST_H_
+#define ROCRTST_UTILS_TIMER_TEST_H_
+
+// Encapsulates Api's to access Timer service of rocrtst Utils library
+class rocrtstUtilsTimerTest {
+
+ public:
+
+  // Destructor method of test driver
+  ~rocrtstUtilsTimerTest();
+
+  // Constructor method of test driver
+  //
+  // @brief loopCnt number of times to call sleep Api
+  //
+  // @brief sleepTimer time to sleep in milliseconds
+  rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime);
+
+  // Execute user defined number of sleep calls and collect the
+  // total time taken by such calls
+  void run();
+
+  // Print time reported by rocrtst Utils Timer service
+  void print();
+
+ private:
+
+  // Number of times to invoke sleep Api
+  uint32_t loopCnt_;
+
+  // Time to sleep per cycle, in milliseconds
+  uint32_t sleepTime_;
+
+  // Time taken by sleep Api
+  double total_time_;
+};
+
+#endif
@@ -0,0 +1,31 @@
+#
+# Source files for Google Test Framework
+#
+set (gtFrwkSrcs src/gtest.cpp)
+set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-port.cpp)
+set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-printers.cpp)
+set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-filepath.cpp)
+set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-test-part.cpp)
+set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-typed-test.cpp)
+set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-death-test.cpp)
+set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest_main.cpp)
+
+#
+# Header files include path(s).
+#
+include_directories(include)
+include_directories(include/gtest)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+#
+# Build Google Test Framework as a Static Library object
+#
+add_library(${GOOGLE_TEST_FRWK_NAME} STATIC ${gtFrwkSrcs})
+
+#
+# Install build artifacts into one common location
+#
+INSTALL(TARGETS ${GOOGLE_TEST_FRWK_NAME}
+        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
+        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
+        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,294 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for death tests.  It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+#include "gtest/internal/gtest-death-test-internal.h"
+
+namespace testing {
+
+// This flag controls the style of death tests.  Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process.  Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests.  IMPORTANT: This is an internal utility.  Using it may break the
+// implementation of death tests.  User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
+
+}  // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+//   1. It generates a warning if there is more than one active
+//   thread.  This is because it's safe to fork() or clone() only
+//   when there is a single thread.
+//
+//   2. The parent process clone()s a sub-process and runs the death
+//   test in it; the sub-process exits with code 0 at the end of the
+//   death test, if it hasn't exited already.
+//
+//   3. The parent process waits for the sub-process to terminate.
+//
+//   4. The parent process checks the exit code and error message of
+//   the sub-process.
+//
+// Examples:
+//
+//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+//   for (int i = 0; i < 5; i++) {
+//     EXPECT_DEATH(server.ProcessRequest(i),
+//                  "Invalid request .* in ProcessRequest()")
+//                  << "Failed to die on request " << i;
+//   }
+//
+//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+//   bool KilledBySIGHUP(int exit_code) {
+//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+//   }
+//
+//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// On the regular expressions used in death tests:
+//
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
+//
+// Known caveats:
+//
+//   A "threadsafe" style death test obtains the path to the test
+//   program from argv[0] and re-executes it in the sub-process.  For
+//   simplicity, the current implementation doesn't search the PATH
+//   when launching the sub-process.  This means that the user must
+//   invoke the test program via a path that contains at least one
+//   path separator (e.g. path/to/foo_test and
+//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
+//   is rarely a problem as people usually don't put the test binary
+//   directory in PATH.
+//
+// TODO(wan@google.com): make thread-safe death tests search the PATH.
+
+// Asserts that a given statement causes the program to exit, with an
+// integer exit status that satisfies predicate, and emitting error output
+// that matches regex.
+# define ASSERT_EXIT(statement, predicate, regex) \
+    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
+
+// Like ASSERT_EXIT, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_EXIT(statement, predicate, regex) \
+    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given statement causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches regex.
+# define ASSERT_DEATH(statement, regex) \
+    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Like ASSERT_DEATH, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_DEATH(statement, regex) \
+    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+  explicit ExitedWithCode(int exit_code);
+  bool operator()(int exit_status) const;
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ExitedWithCode& other);
+
+  const int exit_code_;
+};
+
+# if !GTEST_OS_WINDOWS
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+class GTEST_API_ KilledBySignal {
+ public:
+  explicit KilledBySignal(int signum);
+  bool operator()(int exit_status) const;
+ private:
+  const int signum_;
+};
+# endif  // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+//   if (sideeffect) {
+//     *sideeffect = 12;
+//   }
+//   LOG(DFATAL) << "death";
+//   return 12;
+// }
+//
+// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
+//   int sideeffect = 0;
+//   // Only asserts in dbg.
+//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+//   // opt-mode has sideeffect visible.
+//   EXPECT_EQ(12, sideeffect);
+// #else
+//   // dbg-mode no visible sideeffect.
+//   EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects.  A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+//   // Side-effects here will have an effect after this statement in
+//   // opt mode, but none in debug mode.
+//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+# ifdef NDEBUG
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+# else
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  EXPECT_DEATH(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  ASSERT_DEATH(statement, regex)
+
+# endif  // NDEBUG for EXPECT_DEBUG_DEATH
+#endif  // GTEST_HAS_DEATH_TEST
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning.  This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    EXPECT_DEATH(statement, regex)
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    ASSERT_DEATH(statement, regex)
+#else
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
+#endif
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
@@ -0,0 +1,253 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+
+#include "gtest/internal/gtest-port.h"
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+//   1. You stream a bunch of values to a Message object.
+//      It will remember the text in a stringstream.
+//   2. Then you stream the Message object to an ostream.
+//      This causes the text in the Message to be streamed
+//      to the ostream.
+//
+// For example;
+//
+//   testing::Message foo;
+//   foo << 1 << " != " << 2;
+//   std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from.  In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC.  You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do).  The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+  // The type of basic IO manipulators (endl, ends, and flush) for
+  // narrow streams.
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+  // Constructs an empty Message.
+  Message();
+
+  // Copy constructor.
+  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
+    *ss_ << msg.GetString();
+  }
+
+  // Constructs a Message from a C-string.
+  explicit Message(const char* str) : ss_(new ::std::stringstream) {
+    *ss_ << str;
+  }
+
+#if GTEST_OS_SYMBIAN
+  // Streams a value (either a pointer or not) to this object.
+  template <typename T>
+  inline Message& operator <<(const T& value) {
+    StreamHelper(typename internal::is_pointer<T>::type(), value);
+    return *this;
+  }
+#else
+  // Streams a non-pointer value to this object.
+  template <typename T>
+  inline Message& operator <<(const T& val) {
+    // Some libraries overload << for STL containers.  These
+    // overloads are defined in the global namespace instead of ::std.
+    //
+    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+    // overloads are visible in either the std namespace or the global
+    // namespace, but not other namespaces, including the testing
+    // namespace which Google Test's Message class is in.
+    //
+    // To allow STL containers (and other types that has a << operator
+    // defined in the global namespace) to be used in Google Test
+    // assertions, testing::Message must access the custom << operator
+    // from the global namespace.  With this using declaration,
+    // overloads of << defined in the global namespace and those
+    // visible via Koenig lookup are both exposed in this function.
+    using ::operator <<;
+    *ss_ << val;
+    return *this;
+  }
+
+  // Streams a pointer value to this object.
+  //
+  // This function is an overload of the previous one.  When you
+  // stream a pointer to a Message, this definition will be used as it
+  // is more specialized.  (The C++ Standard, section
+  // [temp.func.order].)  If you stream a non-pointer, then the
+  // previous definition will be used.
+  //
+  // The reason for this overload is that streaming a NULL pointer to
+  // ostream is undefined behavior.  Depending on the compiler, you
+  // may get "0", "(nil)", "(null)", or an access violation.  To
+  // ensure consistent result across compilers, we always treat NULL
+  // as "(null)".
+  template <typename T>
+  inline Message& operator <<(T* const& pointer) {  // NOLINT
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    }
+    else {
+      *ss_ << pointer;
+    }
+
+    return *this;
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // Since the basic IO manipulators are overloaded for both narrow
+  // and wide streams, we have to provide this specialized definition
+  // of operator <<, even though its body is the same as the
+  // templatized version above.  Without this definition, streaming
+  // endl or other basic IO manipulators to Message will confuse the
+  // compiler.
+  Message& operator <<(BasicNarrowIoManip val) {
+    *ss_ << val;
+    return *this;
+  }
+
+  // Instead of 1/0, we want to see true/false for bool values.
+  Message& operator <<(bool b) {
+    return *this << (b ? "true" : "false");
+  }
+
+  // These two overloads allow streaming a wide C string to a Message
+  // using the UTF-8 encoding.
+  Message& operator <<(const wchar_t* wide_c_str);
+  Message& operator <<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::std::wstring& wstr);
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::wstring& wstr);
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+  // Gets the text streamed to this object so far as an std::string.
+  // Each '\0' character in the buffer is replaced with "\\0".
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  std::string GetString() const;
+
+ private:
+
+#if GTEST_OS_SYMBIAN
+  // These are needed as the Nokia Symbian Compiler cannot decide between
+  // const T& and const T* in a function template. The Nokia compiler _can_
+  // decide between class template specializations for T and T*, so a
+  // tr1::type_traits-like is_pointer works, and we can overload on that.
+  template <typename T>
+  inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    }
+    else {
+      *ss_ << pointer;
+    }
+  }
+  template <typename T>
+  inline void StreamHelper(internal::false_type /*is_pointer*/,
+                           const T& value) {
+    // See the comments in Message& operator <<(const T&) above for why
+    // we need this using statement.
+    using ::operator <<;
+    *ss_ << value;
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // We'll hold the text streamed to this object here.
+  const internal::scoped_ptr< ::std::stringstream> ss_;
+
+  // We declare (but don't implement) this to prevent the compiler
+  // from implementing the assignment operator.
+  void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+  return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+  return (Message() << streamable).GetString();
+}
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
@@ -0,0 +1,868 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Google Test - The Google C++ Testing Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// A user can teach this function how to print a class type T by
+// defining either operator<<() or PrintTo() in the namespace that
+// defines T.  More specifically, the FIRST defined function in the
+// following list will be used (assuming T is defined in namespace
+// foo):
+//
+//   1. foo::PrintTo(const T&, ostream*)
+//   2. operator<<(ostream&, const T&) defined in either foo or the
+//      global namespace.
+//
+// If none of the above is defined, it will print the debug string of
+// the value if it is a protocol buffer, or print the raw bytes in the
+// value otherwise.
+//
+// To aid debugging: when T is a reference type, the address of the
+// value is also printed; when T is a (const) char pointer, both the
+// pointer value and the NUL-terminated string it points to are
+// printed.
+//
+// We also provide some convenient wrappers:
+//
+//   // Prints a value to a string.  For a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   std::string ::testing::PrintToString(const T& value);
+//
+//   // Prints a value tersely: for a reference type, the referenced
+//   // value (but not the address) is printed; for a (const or not) char
+//   // pointer, the NUL-terminated string (but not the pointer) is
+//   // printed.
+//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
+//
+//   // Prints value using the type inferred by the compiler.  The difference
+//   // from UniversalTersePrint() is that this function prints both the
+//   // pointer and the NUL-terminated string for a (const or not) char pointer.
+//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
+//
+//   // Prints the fields of a tuple tersely to a string vector, one
+//   // element for each field. Tuple support must be enabled in
+//   // gtest-port.h.
+//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
+//       const Tuple& value);
+//
+// Known limitation:
+//
+// The print primitives print the elements of an STL-style container
+// using the compiler-inferred type of *iter where iter is a
+// const_iterator of the container.  When const_iterator is an input
+// iterator but not a forward iterator, this inferred type may not
+// match value_type, and the print output may be incorrect.  In
+// practice, this is rarely a problem as for most containers
+// const_iterator is a forward iterator.  We'll fix this if there's an
+// actual need for it.  Note that this fix cannot rely on value_type
+// being defined as many user-defined container types don't have
+// value_type.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+#include <ostream>  // NOLINT
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+#include "gtest/internal/gtest-port.h"
+#include "gtest/internal/gtest-internal.h"
+
+namespace testing {
+
+// Definitions in the 'internal' and 'internal2' name spaces are
+// subject to change without notice.  DO NOT USE THEM IN USER CODE!
+namespace internal2 {
+
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+                                     size_t count,
+                                     ::std::ostream* os);
+
+// For selecting which printer to use when a given type has neither <<
+// nor PrintTo().
+enum TypeKind {
+  kProtobuf,              // a protobuf type
+  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
+  // (e.g. a named or unnamed enum type)
+  kOtherType              // anything else
+};
+
+// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
+// by the universal printer to print a value of type T when neither
+// operator<< nor PrintTo() is defined for T, where kTypeKind is the
+// "kind" of T as defined by enum TypeKind.
+template <typename T, TypeKind kTypeKind>
+class TypeWithoutFormatter {
+ public:
+  // This default version is called when kTypeKind is kOtherType.
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
+                         sizeof(value), os);
+  }
+};
+
+// We print a protobuf using its ShortDebugString() when the string
+// doesn't exceed this many characters; otherwise we print it using
+// DebugString() for better readability.
+const size_t kProtobufOneLinerMaxLength = 50;
+
+template <typename T>
+class TypeWithoutFormatter<T, kProtobuf> {
+ public:
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    const ::testing::internal::string short_str = value.ShortDebugString();
+    const ::testing::internal::string pretty_str =
+      short_str.length() <= kProtobufOneLinerMaxLength ?
+      short_str : ("\n" + value.DebugString());
+    *os << ("<" + pretty_str + ">");
+  }
+};
+
+template <typename T>
+class TypeWithoutFormatter<T, kConvertibleToInteger> {
+ public:
+  // Since T has no << operator or PrintTo() but can be implicitly
+  // converted to BiggestInt, we print it as a BiggestInt.
+  //
+  // Most likely T is an enum type (either named or unnamed), in which
+  // case printing it as an integer is the desired behavior.  In case
+  // T is not an enum, printing it as an integer is the best we can do
+  // given that it has no user-defined printer.
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    const internal::BiggestInt kBigInt = value;
+    *os << kBigInt;
+  }
+};
+
+// Prints the given value to the given ostream.  If the value is a
+// protocol message, its debug string is printed; if it's an enum or
+// of a type implicitly convertible to BiggestInt, it's printed as an
+// integer; otherwise the bytes in the value are printed.  This is
+// what UniversalPrinter<T>::Print() does when it knows nothing about
+// type T and T has neither << operator nor PrintTo().
+//
+// A user can override this behavior for a class type Foo by defining
+// a << operator in the namespace where Foo is defined.
+//
+// We put this operator in namespace 'internal2' instead of 'internal'
+// to simplify the implementation, as much code in 'internal' needs to
+// use << in STL, which would conflict with our own << were it defined
+// in 'internal'.
+//
+// Note that this operator<< takes a generic std::basic_ostream<Char,
+// CharTraits> type instead of the more restricted std::ostream.  If
+// we define it to take an std::ostream instead, we'll get an
+// "ambiguous overloads" compiler error when trying to print a type
+// Foo that supports streaming to std::basic_ostream<Char,
+// CharTraits>, as the compiler cannot tell whether
+// operator<<(std::ostream&, const T&) or
+// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
+// specific.
+template <typename Char, typename CharTraits, typename T>
+::std::basic_ostream<Char, CharTraits>& operator<<(
+  ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
+  TypeWithoutFormatter < T,
+                       (internal::IsAProtocolMessage<T>::value ? kProtobuf :
+                        internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
+                        kConvertibleToInteger : kOtherType) >::PrintValue(x, &os);
+  return os;
+}
+
+}  // namespace internal2
+}  // namespace testing
+
+// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
+// magic needed for implementing UniversalPrinter won't work.
+namespace testing_internal {
+
+// Used to print a value that is not an STL-style container when the
+// user doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
+  // With the following statement, during unqualified name lookup,
+  // testing::internal2::operator<< appears as if it was declared in
+  // the nearest enclosing namespace that contains both
+  // ::testing_internal and ::testing::internal2, i.e. the global
+  // namespace.  For more details, refer to the C++ Standard section
+  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
+  // testing::internal2::operator<< in case T doesn't come with a <<
+  // operator.
+  //
+  // We cannot write 'using ::testing::internal2::operator<<;', which
+  // gcc 3.3 fails to compile due to a compiler bug.
+  using namespace ::testing::internal2;  // NOLINT
+
+  // Assuming T is defined in namespace foo, in the next statement,
+  // the compiler will consider all of:
+  //
+  //   1. foo::operator<< (thanks to Koenig look-up),
+  //   2. ::operator<< (as the current namespace is enclosed in ::),
+  //   3. testing::internal2::operator<< (thanks to the using statement above).
+  //
+  // The operator<< whose type matches T best will be picked.
+  //
+  // We deliberately allow #2 to be a candidate, as sometimes it's
+  // impossible to define #1 (e.g. when foo is ::std, defining
+  // anything in it is undefined behavior unless you are a compiler
+  // vendor.).
+  *os << value;
+}
+
+}  // namespace testing_internal
+
+namespace testing {
+namespace internal {
+
+// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
+// value to the given ostream.  The caller must ensure that
+// 'ostream_ptr' is not NULL, or the behavior is undefined.
+//
+// We define UniversalPrinter as a class template (as opposed to a
+// function template), as we need to partially specialize it for
+// reference types, which cannot be done with function templates.
+template <typename T>
+class UniversalPrinter;
+
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
+
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+template <typename C>
+void DefaultPrintTo(IsContainer /* dummy */,
+                    false_type /* is not a pointer */,
+                    const C& container, ::std::ostream* os) {
+  const size_t kMaxCount = 32;  // The maximum number of elements to print.
+  *os << '{';
+  size_t count = 0;
+
+  for (typename C::const_iterator it = container.begin();
+       it != container.end(); ++it, ++count) {
+    if (count > 0) {
+      *os << ',';
+
+      if (count == kMaxCount) {  // Enough has been printed.
+        *os << " ...";
+        break;
+      }
+    }
+
+    *os << ' ';
+    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
+    // handle *it being a native array.
+    internal::UniversalPrint(*it, os);
+  }
+
+  if (count > 0) {
+    *os << ' ';
+  }
+
+  *os << '}';
+}
+
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it.  (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space.  Their representation is
+// implementation-defined.  Therefore they will be printed as raw
+// bytes.)
+template <typename T>
+void DefaultPrintTo(IsNotContainer /* dummy */,
+                    true_type /* is a pointer */,
+                    T* p, ::std::ostream* os) {
+  if (p == NULL) {
+    *os << "NULL";
+  }
+  else {
+    // C++ doesn't allow casting from a function pointer to any object
+    // pointer.
+    //
+    // IsTrue() silences warnings: "Condition is always true",
+    // "unreachable code".
+    if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
+      // T is not a function type.  We just call << to print p,
+      // relying on ADL to pick up user-defined << for their pointer
+      // types, if any.
+      *os << p;
+    }
+    else {
+      // T is a function type, so '*os << p' doesn't do what we want
+      // (it just prints p as bool).  We want to print p as a const
+      // void*.  However, we cannot cast it to const void* directly,
+      // even using reinterpret_cast, as earlier versions of gcc
+      // (e.g. 3.4.5) cannot compile the cast when p is a function
+      // pointer.  Casting to UInt64 first solves the problem.
+      *os << reinterpret_cast<const void*>(
+            reinterpret_cast<internal::UInt64>(p));
+    }
+  }
+}
+
+// Used to print a non-container, non-pointer value when the user
+// doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintTo(IsNotContainer /* dummy */,
+                    false_type /* is not a pointer */,
+                    const T& value, ::std::ostream* os) {
+  ::testing_internal::DefaultPrintNonContainerTo(value, os);
+}
+
+// Prints the given value using the << operator if it has one;
+// otherwise prints the bytes in it.  This is what
+// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
+// or overloaded for type T.
+//
+// A user can override this behavior for a class type Foo by defining
+// an overload of PrintTo() in the namespace where Foo is defined.  We
+// give the user this option as sometimes defining a << operator for
+// Foo is not desirable (e.g. the coding style may prevent doing it,
+// or there is already a << operator but it doesn't do what the user
+// wants).
+template <typename T>
+void PrintTo(const T& value, ::std::ostream* os) {
+  // DefaultPrintTo() is overloaded.  The type of its first two
+  // arguments determine which version will be picked.  If T is an
+  // STL-style container, the version for container will be called; if
+  // T is a pointer, the pointer version will be called; otherwise the
+  // generic version will be called.
+  //
+  // Note that we check for container types here, prior to we check
+  // for protocol message types in our operator<<.  The rationale is:
+  //
+  // For protocol messages, we want to give people a chance to
+  // override Google Mock's format by defining a PrintTo() or
+  // operator<<.  For STL containers, other formats can be
+  // incompatible with Google Mock's format for the container
+  // elements; therefore we check for container types here to ensure
+  // that our format is used.
+  //
+  // The second argument of DefaultPrintTo() is needed to bypass a bug
+  // in Symbian's C++ compiler that prevents it from picking the right
+  // overload between:
+  //
+  //   PrintTo(const T& x, ...);
+  //   PrintTo(T* x, ...);
+  DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
+}
+
+// The following list of PrintTo() overloads tells
+// UniversalPrinter<T>::Print() how to print standard types (built-in
+// types, strings, plain arrays, and pointers).
+
+// Overloads for various char types.
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
+  // When printing a plain char, we always treat it as unsigned.  This
+  // way, the output won't be affected by whether the compiler thinks
+  // char is signed or not.
+  PrintTo(static_cast<unsigned char>(c), os);
+}
+
+// Overloads for other simple built-in types.
+inline void PrintTo(bool x, ::std::ostream* os) {
+  *os << (x ? "true" : "false");
+}
+
+// Overload for wchar_t type.
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its decimal code (except for L'\0').
+// The L'\0' char is printed as "L'\\0'". The decimal code is printed
+// as signed integer when wchar_t is implemented by the compiler
+// as a signed type and is printed as an unsigned integer when wchar_t
+// is implemented as an unsigned type.
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+
+// Overloads for C strings.
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char*>(s), os);
+}
+
+// signed/unsigned char is often used for representing binary data, so
+// we print pointers to it as void* to be safe.
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+
+// MSVC can be configured to define wchar_t as a typedef of unsigned
+// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
+// type.  When wchar_t is a typedef, defining an overload for const
+// wchar_t* would cause unsigned short* be printed as a wide string,
+// possibly causing invalid memory accesses.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Overloads for wide C strings
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
+}
+#endif
+
+// Overload for C arrays.  Multi-dimensional arrays are printed
+// properly.
+
+// Prints the given number of elements in an array, without printing
+// the curly braces.
+template <typename T>
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
+  UniversalPrint(a[0], os);
+
+  for (size_t i = 1; i != count; i++) {
+    *os << ", ";
+    UniversalPrint(a[i], os);
+  }
+}
+
+// Overloads for ::string and ::std::string.
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_API_ void PrintStringTo(const ::string& s, ::std::ostream* os);
+inline void PrintTo(const ::string& s, ::std::ostream* os) {
+  PrintStringTo(s, os);
+}
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
+  PrintStringTo(s, os);
+}
+
+// Overloads for ::wstring and ::std::wstring.
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::wstring& s, ::std::ostream* os);
+inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
+  PrintWideStringTo(s, os);
+}
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
+  PrintWideStringTo(s, os);
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_TR1_TUPLE
+// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
+// which are packed as tuples.
+
+// Helper function for printing a tuple.  T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T& t, ::std::ostream* os);
+
+// Overloaded PrintTo() for tuples of various arities.  We support
+// tuples of up-to 10 fields.  The following implementation works
+// regardless of whether tr1::tuple is implemented using the
+// non-standard variadic template feature or not.
+
+inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1>
+void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2>
+void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8, typename T9>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
+             ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6, typename T7, typename T8, typename T9, typename T10>
+void PrintTo(
+  const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
+  ::std::ostream* os) {
+  PrintTupleTo(t, os);
+}
+#endif  // GTEST_HAS_TR1_TUPLE
+
+// Overload for std::pair.
+template <typename T1, typename T2>
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
+  *os << '(';
+  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
+  // a reference type.  The same for printing value.second.
+  UniversalPrinter<T1>::Print(value.first, os);
+  *os << ", ";
+  UniversalPrinter<T2>::Print(value.second, os);
+  *os << ')';
+}
+
+// Implements printing a non-reference type T by letting the compiler
+// pick the right overload of PrintTo() for T.
+template <typename T>
+class UniversalPrinter {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+#ifdef _MSC_VER
+# pragma warning(push)          // Saves the current warning state.
+# pragma warning(disable:4180)  // Temporarily disables warning 4180.
+#endif  // _MSC_VER
+
+  // Note: we deliberately don't call this PrintTo(), as that name
+  // conflicts with ::testing::internal::PrintTo in the body of the
+  // function.
+  static void Print(const T& value, ::std::ostream* os) {
+    // By default, ::testing::internal::PrintTo() is used for printing
+    // the value.
+    //
+    // Thanks to Koenig look-up, if T is a class and has its own
+    // PrintTo() function defined in its namespace, that function will
+    // be visible here.  Since it is more specific than the generic ones
+    // in ::testing::internal, it will be picked by the compiler in the
+    // following statement - exactly what we want.
+    PrintTo(value, os);
+  }
+
+#ifdef _MSC_VER
+# pragma warning(pop)           // Restores the warning state.
+#endif  // _MSC_VER
+};
+
+// UniversalPrintArray(begin, len, os) prints an array of 'len'
+// elements, starting at address 'begin'.
+template <typename T>
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
+  if (len == 0) {
+    *os << "{}";
+  }
+  else {
+    *os << "{ ";
+    const size_t kThreshold = 18;
+    const size_t kChunkSize = 8;
+
+    // If the array has more than kThreshold elements, we'll have to
+    // omit some details by printing only the first and the last
+    // kChunkSize elements.
+    // TODO(wan@google.com): let the user control the threshold using a flag.
+    if (len <= kThreshold) {
+      PrintRawArrayTo(begin, len, os);
+    }
+    else {
+      PrintRawArrayTo(begin, kChunkSize, os);
+      *os << ", ..., ";
+      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
+    }
+
+    *os << " }";
+  }
+}
+// This overload prints a (const) char array compactly.
+GTEST_API_ void UniversalPrintArray(
+  const char* begin, size_t len, ::std::ostream* os);
+
+// This overload prints a (const) wchar_t array compactly.
+GTEST_API_ void UniversalPrintArray(
+  const wchar_t* begin, size_t len, ::std::ostream* os);
+
+// Implements printing an array type T[N].
+template <typename T, size_t N>
+class UniversalPrinter<T[N]> {
+ public:
+  // Prints the given array, omitting some elements when there are too
+  // many.
+  static void Print(const T (&a)[N], ::std::ostream* os) {
+    UniversalPrintArray(a, N, os);
+  }
+};
+
+// Implements printing a reference type T&.
+template <typename T>
+class UniversalPrinter<T&> {
+ public:
+  // MSVC warns about adding const to a function type, so we want to
+  // disable the warning.
+#ifdef _MSC_VER
+# pragma warning(push)          // Saves the current warning state.
+# pragma warning(disable:4180)  // Temporarily disables warning 4180.
+#endif  // _MSC_VER
+
+  static void Print(const T& value, ::std::ostream* os) {
+    // Prints the address of the value.  We use reinterpret_cast here
+    // as static_cast doesn't compile when T is a function type.
+    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
+
+    // Then prints the value itself.
+    UniversalPrint(value, os);
+  }
+
+#ifdef _MSC_VER
+# pragma warning(pop)           // Restores the warning state.
+#endif  // _MSC_VER
+};
+
+// Prints a value tersely: for a reference type, the referenced value
+// (but not the address) is printed; for a (const) char pointer, the
+// NUL-terminated string (but not the pointer) is printed.
+
+template <typename T>
+class UniversalTersePrinter {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
+  }
+};
+template <typename T>
+class UniversalTersePrinter<T&> {
+ public:
+  static void Print(const T& value, ::std::ostream* os) {
+    UniversalPrint(value, os);
+  }
+};
+template <typename T, size_t N>
+class UniversalTersePrinter<T[N]> {
+ public:
+  static void Print(const T (&value)[N], ::std::ostream* os) {
+    UniversalPrinter<T[N]>::Print(value, os);
+  }
+};
+template <>
+class UniversalTersePrinter<const char*> {
+ public:
+  static void Print(const char* str, ::std::ostream* os) {
+    if (str == NULL) {
+      *os << "NULL";
+    }
+    else {
+      UniversalPrint(string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char*> {
+ public:
+  static void Print(char* str, ::std::ostream* os) {
+    UniversalTersePrinter<const char*>::Print(str, os);
+  }
+};
+
+#if GTEST_HAS_STD_WSTRING
+template <>
+class UniversalTersePrinter<const wchar_t*> {
+ public:
+  static void Print(const wchar_t* str, ::std::ostream* os) {
+    if (str == NULL) {
+      *os << "NULL";
+    }
+    else {
+      UniversalPrint(::std::wstring(str), os);
+    }
+  }
+};
+#endif
+
+template <>
+class UniversalTersePrinter<wchar_t*> {
+ public:
+  static void Print(wchar_t* str, ::std::ostream* os) {
+    UniversalTersePrinter<const wchar_t*>::Print(str, os);
+  }
+};
+
+template <typename T>
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
+  UniversalTersePrinter<T>::Print(value, os);
+}
+
+// Prints a value using the type inferred by the compiler.  The
+// difference between this and UniversalTersePrint() is that for a
+// (const) char pointer, this prints both the pointer and the
+// NUL-terminated string.
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os) {
+  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
+  // UniversalPrinter with T directly.
+  typedef T T1;
+  UniversalPrinter<T1>::Print(value, os);
+}
+
+#if GTEST_HAS_TR1_TUPLE
+typedef ::std::vector<string> Strings;
+
+// This helper template allows PrintTo() for tuples and
+// UniversalTersePrintTupleFieldsToStrings() to be defined by
+// induction on the number of tuple fields.  The idea is that
+// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
+// fields in tuple t, and can be defined in terms of
+// TuplePrefixPrinter<N - 1>.
+
+// The inductive case.
+template <size_t N>
+struct TuplePrefixPrinter {
+  // Prints the first N fields of a tuple.
+  template <typename Tuple>
+  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
+    TuplePrefixPrinter < N - 1 >::PrintPrefixTo(t, os);
+    *os << ", ";
+    UniversalPrinter < typename ::std::tr1::tuple_element < N - 1, Tuple >::type >
+    ::Print(::std::tr1::get < N - 1 > (t), os);
+  }
+
+  // Tersely prints the first N fields of a tuple to a string vector,
+  // one element for each field.
+  template <typename Tuple>
+  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
+    TuplePrefixPrinter < N - 1 >::TersePrintPrefixToStrings(t, strings);
+    ::std::stringstream ss;
+    UniversalTersePrint(::std::tr1::get < N - 1 > (t), &ss);
+    strings->push_back(ss.str());
+  }
+};
+
+// Base cases.
+template <>
+struct TuplePrefixPrinter<0> {
+  template <typename Tuple>
+  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
+
+  template <typename Tuple>
+  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
+};
+// We have to specialize the entire TuplePrefixPrinter<> class
+// template here, even though the definition of
+// TersePrintPrefixToStrings() is the same as the generic version, as
+// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
+// support specializing a method template of a class template.
+template <>
+struct TuplePrefixPrinter<1> {
+  template <typename Tuple>
+  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
+    UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
+    Print(::std::tr1::get<0>(t), os);
+  }
+
+  template <typename Tuple>
+  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
+    ::std::stringstream ss;
+    UniversalTersePrint(::std::tr1::get<0>(t), &ss);
+    strings->push_back(ss.str());
+  }
+};
+
+// Helper function for printing a tuple.  T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T& t, ::std::ostream* os) {
+  *os << "(";
+  TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
+  PrintPrefixTo(t, os);
+  *os << ")";
+}
+
+// Prints the fields of a tuple tersely to a string vector, one
+// element for each field.  See the comment before
+// UniversalTersePrint() for how we define "tersely".
+template <typename Tuple>
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
+  Strings result;
+  TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
+  TersePrintPrefixToStrings(value, &result);
+  return result;
+}
+#endif  // GTEST_HAS_TR1_TUPLE
+
+}  // namespace internal
+
+template <typename T>
+::std::string PrintToString(const T& value) {
+  ::std::stringstream ss;
+  internal::UniversalTersePrinter<T>::Print(value, &ss);
+  return ss.str();
+}
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
@@ -0,0 +1,232 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Utilities for testing Google Test itself and code that uses Google Test
+// (e.g. frameworks built on top of Google Test).
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+#include "gtest/gtest.h"
+
+namespace testing {
+
+// This helper class can be used to mock out Google Test failure reporting
+// so that we can test Google Test or code that builds on Google Test.
+//
+// An object of this class appends a TestPartResult object to the
+// TestPartResultArray object given in the constructor whenever a Google Test
+// failure is reported. It can either intercept only failures that are
+// generated in the same thread that created this object or it can intercept
+// all generated failures. The scope of this mock object can be controlled with
+// the second argument to the two arguments constructor.
+class GTEST_API_ ScopedFakeTestPartResultReporter
+  : public TestPartResultReporterInterface {
+ public:
+  // The two possible mocking modes of this object.
+  enum InterceptMode {
+    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
+    INTERCEPT_ALL_THREADS           // Intercepts all failures.
+  };
+
+  // The c'tor sets this object as the test part result reporter used
+  // by Google Test.  The 'result' parameter specifies where to report the
+  // results. This reporter will only catch failures generated in the current
+  // thread. DEPRECATED
+  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
+
+  // Same as above, but you can choose the interception scope of this object.
+  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
+                                   TestPartResultArray* result);
+
+  // The d'tor restores the previous test part result reporter.
+  virtual ~ScopedFakeTestPartResultReporter();
+
+  // Appends the TestPartResult object to the TestPartResultArray
+  // received in the constructor.
+  //
+  // This method is from the TestPartResultReporterInterface
+  // interface.
+  virtual void ReportTestPartResult(const TestPartResult& result);
+ private:
+  void Init();
+
+  const InterceptMode intercept_mode_;
+  TestPartResultReporterInterface* old_reporter_;
+  TestPartResultArray* const result_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
+};
+
+namespace internal {
+
+// A helper class for implementing EXPECT_FATAL_FAILURE() and
+// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring.  If that's not the case, a
+// non-fatal failure will be generated.
+class GTEST_API_ SingleFailureChecker {
+ public:
+  // The constructor remembers the arguments.
+  SingleFailureChecker(const TestPartResultArray* results,
+                       TestPartResult::Type type,
+                       const string& substr);
+  ~SingleFailureChecker();
+ private:
+  const TestPartResultArray* const results_;
+  const TestPartResult::Type type_;
+  const string substr_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+// A set of macros for testing Google Test assertions or code that's expected
+// to generate Google Test fatal failures.  It verifies that the given
+// statement will cause exactly one fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - 'statement' cannot reference local non-static variables or
+//     non-static members of the current object.
+//   - 'statement' cannot return a value.
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
+// gtest_unittest.cc will fail to compile if we do that.
+#define EXPECT_FATAL_FAILURE(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ALL_THREADS, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+// A macro for testing Google Test assertions or code that's expected to
+// generate Google Test non-fatal failures.  It asserts that the given
+// statement will cause exactly one non-fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// 'statement' is allowed to reference local variables and members of
+// the current object.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  If we do that, the code won't compile when the user gives
+// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
+// expands to code containing an unprotected comma.  The
+// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
+// catches that.
+//
+// For the same reason, we have to write
+//   if (::testing::internal::AlwaysTrue()) { statement; }
+// instead of
+//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+// to avoid an MSVC warning on unreachable code.
+#define EXPECT_NONFATAL_FAILURE(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
+          &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
@@ -0,0 +1,197 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+  // The possible outcomes of a test part (i.e. an assertion or an
+  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+  enum Type {
+    kSuccess,          // Succeeded.
+    kNonFatalFailure,  // Failed but the test can continue.
+    kFatalFailure      // Failed and the test should be terminated.
+  };
+
+  // C'tor.  TestPartResult does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestPartResult object.
+  TestPartResult(Type a_type,
+                 const char* a_file_name,
+                 int a_line_number,
+                 const char* a_message)
+    : type_(a_type),
+      file_name_(a_file_name == NULL ? "" : a_file_name),
+      line_number_(a_line_number),
+      summary_(ExtractSummary(a_message)),
+      message_(a_message) {
+  }
+
+  // Gets the outcome of the test part.
+  Type type() const {
+    return type_;
+  }
+
+  // Gets the name of the source file where the test part took place, or
+  // NULL if it's unknown.
+  const char* file_name() const {
+    return file_name_.empty() ? NULL : file_name_.c_str();
+  }
+
+  // Gets the line in the source file where the test part took place,
+  // or -1 if it's unknown.
+  int line_number() const {
+    return line_number_;
+  }
+
+  // Gets the summary of the failure message.
+  const char* summary() const {
+    return summary_.c_str();
+  }
+
+  // Gets the message associated with the test part.
+  const char* message() const {
+    return message_.c_str();
+  }
+
+  // Returns true iff the test part passed.
+  bool passed() const {
+    return type_ == kSuccess;
+  }
+
+  // Returns true iff the test part failed.
+  bool failed() const {
+    return type_ != kSuccess;
+  }
+
+  // Returns true iff the test part non-fatally failed.
+  bool nonfatally_failed() const {
+    return type_ == kNonFatalFailure;
+  }
+
+  // Returns true iff the test part fatally failed.
+  bool fatally_failed() const {
+    return type_ == kFatalFailure;
+  }
+
+ private:
+  Type type_;
+
+  // Gets the summary of the failure message by omitting the stack
+  // trace in it.
+  static std::string ExtractSummary(const char* message);
+
+  // The name of the source file where the test part took place, or
+  // "" if the source file is unknown.
+  std::string file_name_;
+  // The line in the source file where the test part took place, or -1
+  // if the line number is unknown.
+  int line_number_;
+  std::string summary_;  // The test failure summary.
+  std::string message_;  // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+  TestPartResultArray() {}
+
+  // Appends the given TestPartResult to the array.
+  void Append(const TestPartResult& result);
+
+  // Returns the TestPartResult at the given index (0-based).
+  const TestPartResult& GetTestPartResult(int index) const;
+
+  // Returns the number of TestPartResult objects in the array.
+  int size() const;
+
+ private:
+  std::vector<TestPartResult> array_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+};
+
+// This interface knows how to report a test part result.
+class TestPartResultReporterInterface {
+ public:
+  virtual ~TestPartResultReporterInterface() {}
+
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+  : public TestPartResultReporterInterface {
+ public:
+  HasNewFatalFailureHelper();
+  virtual ~HasNewFatalFailureHelper();
+  virtual void ReportTestPartResult(const TestPartResult& result);
+  bool has_new_fatal_failure() const {
+    return has_new_fatal_failure_;
+  }
+ private:
+  bool has_new_fatal_failure_;
+  TestPartResultReporterInterface* original_reporter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
@@ -0,0 +1,263 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list.  You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+  ...
+  typedef std::list<T> List;
+  static T shared_;
+  T value_;
+};
+
+// Next, associate a list of types with the test case, which will be
+// repeated for each type in the list.  The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_CASE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   TYPED_TEST_CASE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test case as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  // Since we are inside a derived class template, C++ requires use to
+  // visit the members of FooTest via 'this'.
+  TypeParam n = this->value_;
+
+  // To visit static members of the fixture, add the TestFixture::
+  // prefix.
+  n += TestFixture::shared_;
+
+  // To refer to typedefs in the fixture, add the "typename
+  // TestFixture::" prefix.
+  typename TestFixture::List values;
+  values.push_back(n);
+  ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) {
+  ...
+}
+
+#endif  // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type.  Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are.  The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have.  Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly.  Here's an example:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+  ...
+};
+
+// Next, declare that you will define a type-parameterized test case
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_CASE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test case as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  TypeParam n = 0;
+  ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) {
+  ...
+}
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them.  The first argument of the macro is the
+// test case name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_CASE_P(FooTest,
+                           DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want.  If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test case name.  Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
+
+#endif  // 0
+
+#include "gtest/internal/gtest-port.h"
+#include "gtest/internal/gtest-type-util.h"
+
+// Implements typed tests.
+
+#if GTEST_HAS_TYPED_TEST
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test case.
+# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define TYPED_TEST_CASE(CaseName, Types) \
+  typedef ::testing::internal::TypeList< Types >::type \
+      GTEST_TYPE_PARAMS_(CaseName)
+
+# define TYPED_TEST(CaseName, TestName) \
+  template <typename gtest_TypeParam_> \
+  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
+      : public CaseName<gtest_TypeParam_> { \
+   private: \
+    typedef CaseName<gtest_TypeParam_> TestFixture; \
+    typedef gtest_TypeParam_ TypeParam; \
+    virtual void TestBody(); \
+  }; \
+  bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
+      ::testing::internal::TypeParameterizedTest< \
+          CaseName, \
+          ::testing::internal::TemplateSel< \
+              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
+          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
+              "", #CaseName, #TestName, 0); \
+  template <typename gtest_TypeParam_> \
+  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
+
+#endif  // GTEST_HAS_TYPED_TEST
+
+// Implements type-parameterized tests.
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test case are defined in.  The exact
+// name of the namespace is subject to change without notice.
+# define GTEST_CASE_NAMESPACE_(TestCaseName) \
+  gtest_case_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test case.
+# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
+  gtest_typed_test_case_p_state_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test case.
+# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
+  gtest_registered_test_names_##TestCaseName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+# define TYPED_TEST_CASE_P(CaseName) \
+  static ::testing::internal::TypedTestCasePState \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
+
+# define TYPED_TEST_P(CaseName, TestName) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  template <typename gtest_TypeParam_> \
+  class TestName : public CaseName<gtest_TypeParam_> { \
+   private: \
+    typedef CaseName<gtest_TypeParam_> TestFixture; \
+    typedef gtest_TypeParam_ TypeParam; \
+    virtual void TestBody(); \
+  }; \
+  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
+          __FILE__, __LINE__, #CaseName, #TestName); \
+  } \
+  template <typename gtest_TypeParam_> \
+  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
+
+# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
+  } \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
+          __FILE__, __LINE__, #__VA_ARGS__)
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
+  bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
+      ::testing::internal::TypeParameterizedTestCase<CaseName, \
+          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
+          ::testing::internal::TypeList< Types >::type>::Register(\
+              #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
+
+#endif  // GTEST_HAS_TYPED_TEST_P
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
@@ -0,0 +1,368 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
+// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+// Makes sure this header is not included before gtest.h.
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+# error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
+#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+//   ASSERT_PRED_FORMAT1(pred_format, v1)
+//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+//   ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult.  See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+//   ASSERT_PRED1(pred, v1)
+//   ASSERT_PRED2(pred, v1, v2)
+//   ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce.  Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar = (expression)) \
+    ; \
+  else \
+    on_failure(gtest_ar.failure_message())
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+                                  const char* e1,
+                                  Pred pred,
+                                  const T1& v1) {
+  if (pred(v1)) {
+    return AssertionSuccess();
+  }
+
+  return AssertionFailure() << pred_text << "("
+         << e1 << ") evaluates to false, where"
+         << "\n" << e1 << " evaluates to " << v1;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, v1), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+                                             #v1, \
+                                             pred, \
+                                             v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2) {
+  if (pred(v1, v2)) {
+    return AssertionSuccess();
+  }
+
+  return AssertionFailure() << pred_text << "("
+         << e1 << ", "
+         << e2 << ") evaluates to false, where"
+         << "\n" << e1 << " evaluates to " << v1
+         << "\n" << e2 << " evaluates to " << v2;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             pred, \
+                                             v1, \
+                                             v2), on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3) {
+  if (pred(v1, v2, v3)) {
+    return AssertionSuccess();
+  }
+
+  return AssertionFailure() << pred_text << "("
+         << e1 << ", "
+         << e2 << ", "
+         << e3 << ") evaluates to false, where"
+         << "\n" << e1 << " evaluates to " << v1
+         << "\n" << e2 << " evaluates to " << v2
+         << "\n" << e3 << " evaluates to " << v3;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3), on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4) {
+  if (pred(v1, v2, v3, v4)) {
+    return AssertionSuccess();
+  }
+
+  return AssertionFailure() << pred_text << "("
+         << e1 << ", "
+         << e2 << ", "
+         << e3 << ", "
+         << e4 << ") evaluates to false, where"
+         << "\n" << e1 << " evaluates to " << v1
+         << "\n" << e2 << " evaluates to " << v2
+         << "\n" << e3 << " evaluates to " << v3
+         << "\n" << e4 << " evaluates to " << v4;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4), on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4,
+          typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  const char* e5,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4,
+                                  const T5& v5) {
+  if (pred(v1, v2, v3, v4, v5)) {
+    return AssertionSuccess();
+  }
+
+  return AssertionFailure() << pred_text << "("
+         << e1 << ", "
+         << e2 << ", "
+         << e3 << ", "
+         << e4 << ", "
+         << e5 << ") evaluates to false, where"
+         << "\n" << e1 << " evaluates to " << v1
+         << "\n" << e2 << " evaluates to " << v2
+         << "\n" << e3 << " evaluates to " << v3
+         << "\n" << e4 << " evaluates to " << v4
+         << "\n" << e5 << " evaluates to " << v5;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             #v5, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4, \
+                                             v5), on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
@@ -0,0 +1,58 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Google C++ Testing Framework definitions useful in production code.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void MyMethod();
+//   FRIEND_TEST(MyClassTest, MyMethod);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, MyMethod) {
+//   // Can call MyClass::MyMethod() here.
+// }
+
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
@@ -0,0 +1,330 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests.  They are subject to change without notice.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+#include "gtest/internal/gtest-internal.h"
+
+#include <stdio.h>
+
+namespace testing {
+namespace internal {
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status:  The integer exit information in the format specified
+//               by wait(2)
+// exit code:    The integer code passed to exit(3), _exit(2), or
+//               returned from main()
+class GTEST_API_ DeathTest {
+ public:
+  // Create returns false if there was an error determining the
+  // appropriate action to take for the current death test; for example,
+  // if the gtest_death_test_style flag is set to an invalid value.
+  // The LastMessage method will return a more detailed message in that
+  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
+  // argument is set.  If the death test should be skipped, the pointer
+  // is set to NULL; otherwise, it is set to the address of a new concrete
+  // DeathTest object that controls the execution of the current test.
+  static bool Create(const char* statement, const RE* regex,
+                     const char* file, int line, DeathTest** test);
+  DeathTest();
+  virtual ~DeathTest() { }
+
+  // A helper class that aborts a death test when it's deleted.
+  class ReturnSentinel {
+   public:
+    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+    ~ReturnSentinel() {
+      test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT);
+    }
+   private:
+    DeathTest* const test_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+  } GTEST_ATTRIBUTE_UNUSED_;
+
+  // An enumeration of possible roles that may be taken when a death
+  // test is encountered.  EXECUTE means that the death test logic should
+  // be executed immediately.  OVERSEE means that the program should prepare
+  // the appropriate environment for a child process to execute the death
+  // test, then wait for it to complete.
+  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+  // An enumeration of the three reasons that a test might be aborted.
+  enum AbortReason {
+    TEST_ENCOUNTERED_RETURN_STATEMENT,
+    TEST_THREW_EXCEPTION,
+    TEST_DID_NOT_DIE
+  };
+
+  // Assumes one of the above roles.
+  virtual TestRole AssumeRole() = 0;
+
+  // Waits for the death test to finish and returns its status.
+  virtual int Wait() = 0;
+
+  // Returns true if the death test passed; that is, the test process
+  // exited during the test, its exit status matches a user-supplied
+  // predicate, and its stderr output matches a user-supplied regular
+  // expression.
+  // The user-supplied predicate may be a macro expression rather
+  // than a function pointer or functor, or else Wait and Passed could
+  // be combined.
+  virtual bool Passed(bool exit_status_ok) = 0;
+
+  // Signals that the death test did not die as expected.
+  virtual void Abort(AbortReason reason) = 0;
+
+  // Returns a human-readable outcome message regarding the outcome of
+  // the last death test.
+  static const char* LastMessage();
+
+  static void set_last_death_test_message(const std::string& message);
+
+ private:
+  // A string containing a description of the outcome of the last death test.
+  static std::string last_death_test_message_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+};
+
+// Factory interface for death tests.  May be mocked out for testing.
+class DeathTestFactory {
+ public:
+  virtual ~DeathTestFactory() { }
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test);
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+# if GTEST_HAS_EXCEPTIONS
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  try { \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } catch (const ::std::exception& gtest_exception) { \
+    fprintf(\
+        stderr, \
+        "\n%s: Caught std::exception-derived exception escaping the " \
+        "death test statement. Exception message: %s\n", \
+        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+        gtest_exception.what()); \
+    fflush(stderr); \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  } catch (...) { \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  }
+
+# else
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+
+# endif
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    const ::testing::internal::RE& gtest_regex = (regex); \
+    ::testing::internal::DeathTest* gtest_dt; \
+    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
+        __FILE__, __LINE__, &gtest_dt)) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+    } \
+    if (gtest_dt != NULL) { \
+      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
+          gtest_dt_ptr(gtest_dt); \
+      switch (gtest_dt->AssumeRole()) { \
+        case ::testing::internal::DeathTest::OVERSEE_TEST: \
+          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
+            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+          } \
+          break; \
+        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
+          ::testing::internal::DeathTest::ReturnSentinel \
+              gtest_sentinel(gtest_dt); \
+          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
+          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
+          break; \
+        } \
+        default: \
+          break; \
+      } \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
+      fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed, the regex is
+// ignored, and the macro must accept a streamed message even though the message
+// is never printed.
+# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+     GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } else \
+    ::testing::Message()
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+  InternalRunDeathTestFlag(const std::string& a_file,
+                           int a_line,
+                           int an_index,
+                           int a_write_fd)
+    : file_(a_file), line_(a_line), index_(an_index),
+      write_fd_(a_write_fd) {}
+
+  ~InternalRunDeathTestFlag() {
+    if (write_fd_ >= 0) {
+      posix::Close(write_fd_);
+    }
+  }
+
+  const std::string& file() const {
+    return file_;
+  }
+  int line() const {
+    return line_;
+  }
+  int index() const {
+    return index_;
+  }
+  int write_fd() const {
+    return write_fd_;
+  }
+
+ private:
+  std::string file_;
+  int line_;
+  int index_;
+  int write_fd_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#else  // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
+// systems that support death tests. This allows one to write such a macro
+// on a system that does not support death tests and be sure that it will
+// compile on a death-test supporting system.
+//
+// Parameters:
+//   statement -  A statement that a macro such as EXPECT_DEATH would test
+//                for program termination. This macro has to make sure this
+//                statement is compiled but not executed, to ensure that
+//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+//                parameter iff EXPECT_DEATH compiles with it.
+//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
+//                the output of statement.  This parameter has to be
+//                compiled but not evaluated by this macro, to ensure that
+//                this macro only accepts expressions that a macro such as
+//                EXPECT_DEATH would accept.
+//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+//                compile inside functions where ASSERT_DEATH doesn't
+//                compile.
+//
+//  The branch that has an always false condition is used to ensure that
+//  statement and regex are compiled (and thus syntactically correct) but
+//  never executed. The unreachable code macro protects the terminator
+//  statement from generating an 'unreachable code' warning in case
+//  statement unconditionally returns or throws. The Message constructor at
+//  the end allows the syntax of streaming additional messages into the
+//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::AlwaysTrue()) { \
+      GTEST_LOG_(WARNING) \
+          << "Death tests are not supported on this platform.\n" \
+          << "Statement '" #statement "' cannot be verified."; \
+    } else if (::testing::internal::AlwaysFalse()) { \
+      ::testing::internal::RE::PartialMatch(".*", (regex)); \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+      terminator; \
+    } else \
+      ::testing::Message()
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
@@ -0,0 +1,212 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: keith.ray@gmail.com (Keith Ray)
+//
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test.  They are subject to change without notice.
+//
+// This file is #included in <gtest/internal/gtest-internal.h>.
+// Do not include this header file separately!
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+#include "gtest/internal/gtest-string.h"
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+  FilePath() : pathname_("") { }
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
+
+  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+    Normalize();
+  }
+
+  FilePath& operator=(const FilePath& rhs) {
+    Set(rhs);
+    return *this;
+  }
+
+  void Set(const FilePath& rhs) {
+    pathname_ = rhs.pathname_;
+  }
+
+  const std::string& string() const {
+    return pathname_;
+  }
+  const char* c_str() const {
+    return pathname_.c_str();
+  }
+
+  // Returns the current working directory, or "" if unsuccessful.
+  static FilePath GetCurrentDir();
+
+  // Given directory = "dir", base_name = "test", number = 0,
+  // extension = "xml", returns "dir/test.xml". If number is greater
+  // than zero (e.g., 12), returns "dir/test_12.xml".
+  // On Windows platform, uses \ as the separator rather than /.
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name,
+                               int number,
+                               const char* extension);
+
+  // Given directory = "dir", relative_path = "test.xml",
+  // returns "dir/test.xml".
+  // On Windows, uses \ as the separator rather than /.
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
+
+  // Returns a pathname for a file that does not currently exist. The pathname
+  // will be directory/base_name.extension or
+  // directory/base_name_<number>.extension if directory/base_name.extension
+  // already exists. The number will be incremented until a pathname is found
+  // that does not already exist.
+  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+  // There could be a race condition if two or more processes are calling this
+  // function at the same time -- they could both pick the same filename.
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
+
+  // Returns true iff the path is "".
+  bool IsEmpty() const {
+    return pathname_.empty();
+  }
+
+  // If input name has a trailing separator character, removes it and returns
+  // the name, otherwise return the name string unmodified.
+  // On Windows platform, uses \ as the separator, other platforms use /.
+  FilePath RemoveTrailingPathSeparator() const;
+
+  // Returns a copy of the FilePath with the directory part removed.
+  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+  // returns an empty FilePath ("").
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveDirectoryName() const;
+
+  // RemoveFileName returns the directory path with the filename removed.
+  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveFileName() const;
+
+  // Returns a copy of the FilePath with the case-insensitive extension removed.
+  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+  // FilePath("dir/file"). If a case-insensitive extension is not
+  // found, returns a copy of the original FilePath.
+  FilePath RemoveExtension(const char* extension) const;
+
+  // Creates directories so that path exists. Returns true if successful or if
+  // the directories already exist; returns false if unable to create
+  // directories for any reason. Will also return false if the FilePath does
+  // not represent a directory (that is, it doesn't end with a path separator).
+  bool CreateDirectoriesRecursively() const;
+
+  // Create the directory so that path exists. Returns true if successful or
+  // if the directory already exists; returns false if unable to create the
+  // directory for any reason, including if the parent directory does not
+  // exist. Not named "CreateDirectory" because that's a macro on Windows.
+  bool CreateFolder() const;
+
+  // Returns true if FilePath describes something in the file-system,
+  // either a file, directory, or whatever, and that something exists.
+  bool FileOrDirectoryExists() const;
+
+  // Returns true if pathname describes a directory in the file-system
+  // that exists.
+  bool DirectoryExists() const;
+
+  // Returns true if FilePath ends with a path separator, which indicates that
+  // it is intended to represent a directory. Returns false otherwise.
+  // This does NOT check that a directory (or file) actually exists.
+  bool IsDirectory() const;
+
+  // Returns true if pathname describes a root directory. (Windows has one
+  // root directory per disk drive.)
+  bool IsRootDirectory() const;
+
+  // Returns true if pathname describes an absolute path.
+  bool IsAbsolutePath() const;
+
+ private:
+  // Replaces multiple consecutive separators with a single separator.
+  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+  // redundancies that might be in a pathname involving "." or "..".
+  //
+  // A pathname with multiple consecutive separators may occur either through
+  // user error or as a result of some scripts or APIs that generate a pathname
+  // with a trailing separator. On other platforms the same API or script
+  // may NOT generate a pathname with a trailing "/". Then elsewhere that
+  // pathname may have another "/" and pathname components added to it,
+  // without checking for the separator already being there.
+  // The script language and operating system may allow paths like "foo//bar"
+  // but some of the functions in FilePath will not handle that correctly. In
+  // particular, RemoveTrailingPathSeparator() only removes one separator, and
+  // it is called in CreateDirectoriesRecursively() assuming that it will change
+  // a pathname from directory syntax (trailing separator) to filename syntax.
+  //
+  // On Windows this method also replaces the alternate path separator '/' with
+  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+  // "bar\\foo".
+
+  void Normalize();
+
+  // Returns a pointer to the last occurence of a valid path separator in
+  // the FilePath. On Windows, for example, both '/' and '\' are valid path
+  // separators. Returns NULL if no path separator was found.
+  const char* FindLastPathSeparator() const;
+
+  std::string pathname_;
+};  // class FilePath
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
@@ -0,0 +1,266 @@
+// Copyright 2003 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Dan Egnor (egnor@google.com)
+//
+// A "smart" pointer type with reference tracking.  Every pointer to a
+// particular object is kept on a circular linked list.  When the last pointer
+// to an object is destroyed or reassigned, the object is deleted.
+//
+// Used properly, this deletes the object when the last reference goes away.
+// There are several caveats:
+// - Like all reference counting schemes, cycles lead to leaks.
+// - Each smart pointer is actually two pointers (8 bytes instead of 4).
+// - Every time a pointer is assigned, the entire list of pointers to that
+//   object is traversed.  This class is therefore NOT SUITABLE when there
+//   will often be more than two or three pointers to a particular object.
+// - References are only tracked as long as linked_ptr<> objects are copied.
+//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
+//   will happen (double deletion).
+//
+// A good use of this class is storing object references in STL containers.
+// You can safely put linked_ptr<> in a vector<>.
+// Other uses may not be as good.
+//
+// Note: If you use an incomplete type with linked_ptr<>, the class
+// *containing* linked_ptr<> must have a constructor and destructor (even
+// if they do nothing!).
+//
+// Bill Gibbons suggested we use something like this.
+//
+// Thread Safety:
+//   Unlike other linked_ptr implementations, in this implementation
+//   a linked_ptr object is thread-safe in the sense that:
+//     - it's safe to copy linked_ptr objects concurrently,
+//     - it's safe to copy *from* a linked_ptr and read its underlying
+//       raw pointer (e.g. via get()) concurrently, and
+//     - it's safe to write to two linked_ptrs that point to the same
+//       shared object concurrently.
+// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
+// confusion with normal linked_ptr.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+namespace internal {
+
+// Protects copying of all linked_ptr objects.
+GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// This is used internally by all instances of linked_ptr<>.  It needs to be
+// a non-template class because different types of linked_ptr<> can refer to
+// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
+// So, it needs to be possible for different types of linked_ptr to participate
+// in the same circular linked list, so we need a single class type here.
+//
+// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
+class linked_ptr_internal {
+ public:
+  // Create a new circle that includes only this instance.
+  void join_new() {
+    next_ = this;
+  }
+
+  // Many linked_ptr operations may change p.link_ for some linked_ptr
+  // variable p in the same circle as this object.  Therefore we need
+  // to prevent two such operations from occurring concurrently.
+  //
+  // Note that different types of linked_ptr objects can coexist in a
+  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
+  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
+  // protect all linked_ptr objects.  This can create serious
+  // contention in production code, but is acceptable in a testing
+  // framework.
+
+  // Join an existing circle.
+  void join(linked_ptr_internal const* ptr)
+  GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    linked_ptr_internal const* p = ptr;
+
+    while (p->next_ != ptr) {
+      p = p->next_;
+    }
+
+    p->next_ = this;
+    next_ = ptr;
+  }
+
+  // Leave whatever circle we're part of.  Returns true if we were the
+  // last member of the circle.  Once this is done, you can join() another.
+  bool depart()
+  GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    if (next_ == this) {
+      return true;
+    }
+
+    linked_ptr_internal const* p = next_;
+
+    while (p->next_ != this) {
+      p = p->next_;
+    }
+
+    p->next_ = next_;
+    return false;
+  }
+
+ private:
+  mutable linked_ptr_internal const* next_;
+};
+
+template <typename T>
+class linked_ptr {
+ public:
+  typedef T element_type;
+
+  // Take over ownership of a raw pointer.  This should happen as soon as
+  // possible after the object is created.
+  explicit linked_ptr(T* ptr = NULL) {
+    capture(ptr);
+  }
+  ~linked_ptr() {
+    depart();
+  }
+
+  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
+  template <typename U> linked_ptr(linked_ptr<U> const& ptr) {
+    copy(&ptr);
+  }
+  linked_ptr(linked_ptr const& ptr) {  // NOLINT
+    assert(&ptr != this);
+    copy(&ptr);
+  }
+
+  // Assignment releases the old value and acquires the new.
+  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
+    depart();
+    copy(&ptr);
+    return *this;
+  }
+
+  linked_ptr& operator=(linked_ptr const& ptr) {
+    if (&ptr != this) {
+      depart();
+      copy(&ptr);
+    }
+
+    return *this;
+  }
+
+  // Smart pointer members.
+  void reset(T* ptr = NULL) {
+    depart();
+    capture(ptr);
+  }
+  T* get() const {
+    return value_;
+  }
+  T* operator->() const {
+    return value_;
+  }
+  T& operator*() const {
+    return *value_;
+  }
+
+  bool operator==(T* p) const {
+    return value_ == p;
+  }
+  bool operator!=(T* p) const {
+    return value_ != p;
+  }
+  template <typename U>
+  bool operator==(linked_ptr<U> const& ptr) const {
+    return value_ == ptr.get();
+  }
+  template <typename U>
+  bool operator!=(linked_ptr<U> const& ptr) const {
+    return value_ != ptr.get();
+  }
+
+ private:
+  template <typename U>
+  friend class linked_ptr;
+
+  T* value_;
+  linked_ptr_internal link_;
+
+  void depart() {
+    if (link_.depart()) {
+      delete value_;
+    }
+  }
+
+  void capture(T* ptr) {
+    value_ = ptr;
+    link_.join_new();
+  }
+
+  template <typename U> void copy(linked_ptr<U> const* ptr) {
+    value_ = ptr->get();
+
+    if (value_) {
+      link_.join(&ptr->link_);
+    }
+    else {
+      link_.join_new();
+    }
+  }
+};
+
+template<typename T> inline
+bool operator==(T* ptr, const linked_ptr<T>& x) {
+  return ptr == x.get();
+}
+
+template<typename T> inline
+bool operator!=(T* ptr, const linked_ptr<T>& x) {
+  return ptr != x.get();
+}
+
+// A function to convert T* into linked_ptr<T>
+// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
+// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
+template <typename T>
+linked_ptr<T> make_linked_ptr(T* ptr) {
+  return linked_ptr<T>(ptr);
+}
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
@@ -0,0 +1,301 @@
+$$ -*- mode: c++; -*-
+$var n = 50  $$ Maximum length of Values arguments we want to support.
+$var maxtuple = 10  $$ Maximum number of Combine arguments we want to support.
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently Google Test supports at most $n arguments in Values,
+// and at most $maxtuple arguments in Combine. Please contact
+// googletestframework@googlegroups.com if you need more.
+// Please note that the number of arguments to Combine is limited
+// by the maximum arity of the implementation of tr1::tuple which is
+// currently set at $maxtuple.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*.  Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+#include "gtest/internal/gtest-param-util.h"
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <typename ForwardIterator>
+internal::ParamGenerator<
+  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end);
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container);
+
+namespace internal {
+
+// Used in the Values() function to provide polymorphic capabilities.
+template <typename T1>
+class ValueArray1 {
+ public:
+  explicit ValueArray1(T1 v1) : v1_(v1) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray1& other);
+
+  const T1 v1_;
+};
+
+$range i 2..n
+$for i [[
+$range j 1..i
+
+template <$for j, [[typename T$j]]>
+class ValueArray$i {
+ public:
+  ValueArray$i($for j, [[T$j v$j]]) : $for j, [[v$(j)_(v$j)]] {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {$for j, [[static_cast<T>(v$(j)_)]]};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray$i& other);
+
+$for j [[
+
+  const T$j v$(j)_;
+]]
+
+};
+
+]]
+
+# if GTEST_HAS_COMBINE
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Generates values from the Cartesian product of values produced
+// by the argument generators.
+//
+$range i 2..maxtuple
+$for i [[
+$range j 1..i
+$range k 2..i
+
+template <$for j, [[typename T$j]]>
+class CartesianProductGenerator$i
+    : public ParamGeneratorInterface< ::std::tr1::tuple<$for j, [[T$j]]> > {
+ public:
+  typedef ::std::tr1::tuple<$for j, [[T$j]]> ParamType;
+
+  CartesianProductGenerator$i($for j, [[const ParamGenerator<T$j>& g$j]])
+      : $for j, [[g$(j)_(g$j)]] {}
+  virtual ~CartesianProductGenerator$i() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, $for j, [[g$(j)_, g$(j)_.begin()]]);
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, $for j, [[g$(j)_, g$(j)_.end()]]);
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base, $for j, [[
+
+      const ParamGenerator<T$j>& g$j,
+      const typename ParamGenerator<T$j>::iterator& current$(j)]])
+        : base_(base),
+$for j, [[
+
+          begin$(j)_(g$j.begin()), end$(j)_(g$j.end()), current$(j)_(current$j)
+]]    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current$(i)_;
+
+$for k [[
+      if (current$(i+2-k)_ == end$(i+2-k)_) {
+        current$(i+2-k)_ = begin$(i+2-k)_;
+        ++current$(i+2-k-1)_;
+      }
+
+]]
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         ($for j  && [[
+
+          current$(j)_ == typed_other->current$(j)_
+]]);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_), $for j, [[
+
+        begin$(j)_(other.begin$(j)_),
+        end$(j)_(other.end$(j)_),
+        current$(j)_(other.current$(j)_)
+]] {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType($for j, [[*current$(j)_]]);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+$for j  || [[
+
+          current$(j)_ == end$(j)_
+]];
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+$for j [[
+
+    const typename ParamGenerator<T$j>::iterator begin$(j)_;
+    const typename ParamGenerator<T$j>::iterator end$(j)_;
+    typename ParamGenerator<T$j>::iterator current$(j)_;
+]]
+
+    ParamType current_value_;
+  };  // class CartesianProductGenerator$i::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator$i& other);
+
+
+$for j [[
+  const ParamGenerator<T$j> g$(j)_;
+
+]]
+};  // class CartesianProductGenerator$i
+
+
+]]
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Helper classes providing Combine() with polymorphic features. They allow
+// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
+// convertible to U.
+//
+$range i 2..maxtuple
+$for i [[
+$range j 1..i
+
+template <$for j, [[class Generator$j]]>
+class CartesianProductHolder$i {
+ public:
+CartesianProductHolder$i($for j, [[const Generator$j& g$j]])
+      : $for j, [[g$(j)_(g$j)]] {}
+  template <$for j, [[typename T$j]]>
+  operator ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >() const {
+    return ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >(
+        new CartesianProductGenerator$i<$for j, [[T$j]]>(
+$for j,[[
+
+        static_cast<ParamGenerator<T$j> >(g$(j)_)
+]]));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder$i& other);
+
+
+$for j [[
+  const Generator$j g$(j)_;
+
+]]
+};  // class CartesianProductHolder$i
+
+]]
+
+# endif  // GTEST_HAS_COMBINE
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  //  GTEST_HAS_PARAM_TEST
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
@@ -0,0 +1,650 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <iterator>
+#include <utility>
+#include <vector>
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*.  Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-linked_ptr.h"
+#include "gtest/internal/gtest-port.h"
+#include "gtest/gtest-printers.h"
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test case. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
+    const char* file, int line);
+
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+  virtual ~ParamIteratorInterface() {}
+  // A pointer to the base generator instance.
+  // Used only for the purposes of iterator comparison
+  // to make sure that two iterators belong to the same generator.
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+  // Advances iterator to point to the next element
+  // provided by the generator. The caller is responsible
+  // for not calling Advance() on an iterator equal to
+  // BaseGenerator()->End().
+  virtual void Advance() = 0;
+  // Clones the iterator object. Used for implementing copy semantics
+  // of ParamIterator<T>.
+  virtual ParamIteratorInterface* Clone() const = 0;
+  // Dereferences the current iterator and provides (read-only) access
+  // to the pointed value. It is the caller's responsibility not to call
+  // Current() on an iterator equal to BaseGenerator()->End().
+  // Used for implementing ParamGenerator<T>::operator*().
+  virtual const T* Current() const = 0;
+  // Determines whether the given iterator and other point to the same
+  // element in the sequence generated by the generator.
+  // Used for implementing ParamGenerator<T>::operator==().
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+  typedef T value_type;
+  typedef const T& reference;
+  typedef ptrdiff_t difference_type;
+
+  // ParamIterator assumes ownership of the impl_ pointer.
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
+    if (this != &other) {
+      impl_.reset(other.impl_->Clone());
+    }
+
+    return *this;
+  }
+
+  const T& operator*() const {
+    return *impl_->Current();
+  }
+  const T* operator->() const {
+    return impl_->Current();
+  }
+  // Prefix version of operator++.
+  ParamIterator& operator++() {
+    impl_->Advance();
+    return *this;
+  }
+  // Postfix version of operator++.
+  ParamIterator operator++(int /*unused*/) {
+    ParamIteratorInterface<T>* clone = impl_->Clone();
+    impl_->Advance();
+    return ParamIterator(clone);
+  }
+  bool operator==(const ParamIterator& other) const {
+    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+  }
+  bool operator!=(const ParamIterator& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  friend class ParamGenerator<T>;
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+  scoped_ptr<ParamIteratorInterface<T> > impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+  typedef T ParamType;
+
+  virtual ~ParamGeneratorInterface() {}
+
+  // Generator interface definition
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+  typedef ParamIterator<T> iterator;
+
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+  ParamGenerator& operator=(const ParamGenerator& other) {
+    impl_ = other.impl_;
+    return *this;
+  }
+
+  iterator begin() const {
+    return iterator(impl_->Begin());
+  }
+  iterator end() const {
+    return iterator(impl_->End());
+  }
+
+ private:
+  linked_ptr<const ParamGeneratorInterface<T> > impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  RangeGenerator(T begin, T end, IncrementT step)
+    : begin_(begin), end_(end),
+      step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+  virtual ~RangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, begin_, 0, step_);
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, end_, end_index_, step_);
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+             IncrementT step)
+      : base_(base), value_(value), index_(index), step_(step) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      value_ = value_ + step_;
+      index_++;
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const T* Current() const {
+      return &value_;
+    }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const int other_index =
+        CheckedDowncastToActualType<const Iterator>(&other)->index_;
+      return index_ == other_index;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+      : ParamIteratorInterface<T>(),
+        base_(other.base_), value_(other.value_), index_(other.index_),
+        step_(other.step_) {}
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<T>* const base_;
+    T value_;
+    int index_;
+    const IncrementT step_;
+  };  // class RangeGenerator::Iterator
+
+  static int CalculateEndIndex(const T& begin,
+                               const T& end,
+                               const IncrementT& step) {
+    int end_index = 0;
+
+    for (T i = begin; i < end; i = i + step) {
+      end_index++;
+    }
+
+    return end_index;
+  }
+
+  // No implementation - assignment is unsupported.
+  void operator=(const RangeGenerator& other);
+
+  const T begin_;
+  const T end_;
+  const IncrementT step_;
+  // The index for the end() iterator. All the elements in the generated
+  // sequence are indexed (0-based) to aid iterator comparison.
+  const int end_index_;
+};  // class RangeGenerator
+
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  template <typename ForwardIterator>
+  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+    : container_(begin, end) {}
+  virtual ~ValuesInIteratorRangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, container_.begin());
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, container_.end());
+  }
+
+ private:
+  typedef typename ::std::vector<T> ContainerType;
+
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base,
+             typename ContainerType::const_iterator iterator)
+      : base_(base), iterator_(iterator) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      ++iterator_;
+      value_.reset();
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    // We need to use cached value referenced by iterator_ because *iterator_
+    // can return a temporary object (and of type other then T), so just
+    // having "return &*iterator_;" doesn't work.
+    // value_ is updated here and not in Advance() because Advance()
+    // can advance iterator_ beyond the end of the range, and we cannot
+    // detect that fact. The client code, on the other hand, is
+    // responsible for not calling Current() on an out-of-range iterator.
+    virtual const T* Current() const {
+      if (value_.get() == NULL) {
+        value_.reset(new T(*iterator_));
+      }
+
+      return value_.get();
+    }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      return iterator_ ==
+             CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+    // The explicit constructor call suppresses a false warning
+    // emitted by gcc when supplied with the -Wextra option.
+      : ParamIteratorInterface<T>(),
+        base_(other.base_),
+        iterator_(other.iterator_) {}
+
+    const ParamGeneratorInterface<T>* const base_;
+    typename ContainerType::const_iterator iterator_;
+    // A cached value of *iterator_. We keep it here to allow access by
+    // pointer in the wrapping iterator's operator->().
+    // value_ needs to be mutable to be accessed in Current().
+    // Use of scoped_ptr helps manage cached value's lifetime,
+    // which is bound by the lifespan of the iterator itself.
+    mutable scoped_ptr<const T> value_;
+  };  // class ValuesInIteratorRangeGenerator::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const ValuesInIteratorRangeGenerator& other);
+
+  const ContainerType container_;
+};  // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+  typedef typename TestClass::ParamType ParamType;
+  explicit ParameterizedTestFactory(ParamType parameter) :
+    parameter_(parameter) {}
+  virtual Test* CreateTest() {
+    TestClass::SetParam(&parameter_);
+    return new TestClass();
+  }
+
+ private:
+  const ParamType parameter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+  virtual ~TestMetaFactoryBase() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestCaseInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestCase>
+class TestMetaFactory
+  : public TestMetaFactoryBase<typename TestCase::ParamType> {
+ public:
+  typedef typename TestCase::ParamType ParamType;
+
+  TestMetaFactory() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
+    return new ParameterizedTestFactory<TestCase>(parameter);
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfoBase is a generic interface
+// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
+// a collection of pointers to the ParameterizedTestCaseInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestCaseInfoBase {
+ public:
+  virtual ~ParameterizedTestCaseInfoBase() {}
+
+  // Base part of test case name for display purposes.
+  virtual const string& GetTestCaseName() const = 0;
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const = 0;
+  // UnitTest class invokes this method to register tests in this
+  // test case right before running them in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  virtual void RegisterTests() = 0;
+
+ protected:
+  ParameterizedTestCaseInfoBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test case and generators
+// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
+// test case. It registers tests with all values generated by all
+// generators when asked.
+template <class TestCase>
+class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
+ public:
+  // ParamType and GeneratorCreationFunc are private types but are required
+  // for declarations of public methods AddTestPattern() and
+  // AddTestCaseInstantiation().
+  typedef typename TestCase::ParamType ParamType;
+  // A function that returns an instance of appropriate generator type.
+  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+
+  explicit ParameterizedTestCaseInfo(const char* name)
+    : test_case_name_(name) {}
+
+  // Test case base name for display purposes.
+  virtual const string& GetTestCaseName() const {
+    return test_case_name_;
+  }
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const {
+    return GetTypeId<TestCase>();
+  }
+  // TEST_P macro uses AddTestPattern() to record information
+  // about a single test in a LocalTestInfo structure.
+  // test_case_name is the base name of the test case (without invocation
+  // prefix). test_base_name is the name of an individual test without
+  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+  // test case base name and DoBar is test base name.
+  void AddTestPattern(const char* test_case_name,
+                      const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory) {
+    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
+                                          test_base_name,
+                                          meta_factory)));
+  }
+  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
+  // about a generator.
+  int AddTestCaseInstantiation(const string& instantiation_name,
+                               GeneratorCreationFunc* func,
+                               const char* /* file */,
+                               int /* line */) {
+    instantiations_.push_back(::std::make_pair(instantiation_name, func));
+    return 0;  // Return value used only to run this method in namespace scope.
+  }
+  // UnitTest class invokes this method to register tests in this test case
+  // test cases right before running tests in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  // UnitTest has a guard to prevent from calling this method more then once.
+  virtual void RegisterTests() {
+    for (typename TestInfoContainer::iterator test_it = tests_.begin();
+         test_it != tests_.end(); ++test_it) {
+      linked_ptr<TestInfo> test_info = *test_it;
+
+      for (typename InstantiationContainer::iterator gen_it =
+             instantiations_.begin(); gen_it != instantiations_.end();
+           ++gen_it) {
+        const string& instantiation_name = gen_it->first;
+        ParamGenerator<ParamType> generator((*gen_it->second)());
+
+        string test_case_name;
+
+        if ( !instantiation_name.empty() ) {
+          test_case_name = instantiation_name + "/";
+        }
+
+        test_case_name += test_info->test_case_base_name;
+
+        int i = 0;
+
+        for (typename ParamGenerator<ParamType>::iterator param_it =
+               generator.begin();
+             param_it != generator.end(); ++param_it, ++i) {
+          Message test_name_stream;
+          test_name_stream << test_info->test_base_name << "/" << i;
+          MakeAndRegisterTestInfo(
+            test_case_name.c_str(),
+            test_name_stream.GetString().c_str(),
+            NULL,  // No type parameter.
+            PrintToString(*param_it).c_str(),
+            GetTestCaseTypeId(),
+            TestCase::SetUpTestCase,
+            TestCase::TearDownTestCase,
+            test_info->test_meta_factory->CreateTestFactory(*param_it));
+        }  // for param_it
+      }  // for gen_it
+    }  // for test_it
+  }  // RegisterTests
+
+ private:
+  // LocalTestInfo structure keeps information about a single test registered
+  // with TEST_P macro.
+  struct TestInfo {
+    TestInfo(const char* a_test_case_base_name,
+             const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
+      test_case_base_name(a_test_case_base_name),
+      test_base_name(a_test_base_name),
+      test_meta_factory(a_test_meta_factory) {}
+
+    const string test_case_base_name;
+    const string test_base_name;
+    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+  };
+  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
+  // Keeps pairs of <Instantiation name, Sequence generator creation function>
+  // received from INSTANTIATE_TEST_CASE_P macros.
+  typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
+  InstantiationContainer;
+
+  const string test_case_name_;
+  TestInfoContainer tests_;
+  InstantiationContainer instantiations_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
+};  // class ParameterizedTestCaseInfo
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
+// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
+// macros use it to locate their corresponding ParameterizedTestCaseInfo
+// descriptors.
+class ParameterizedTestCaseRegistry {
+ public:
+  ParameterizedTestCaseRegistry() {}
+  ~ParameterizedTestCaseRegistry() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      delete *it;
+    }
+  }
+
+  // Looks up or creates and returns a structure containing information about
+  // tests and instantiations of a particular test case.
+  template <class TestCase>
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+    const char* test_case_name,
+    const char* file,
+    int line) {
+    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
+
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      if ((*it)->GetTestCaseName() == test_case_name) {
+        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
+          // Complain about incorrect usage of Google Test facilities
+          // and terminate the program since we cannot guaranty correct
+          // test case setup and tear-down in this case.
+          ReportInvalidTestCaseType(test_case_name,  file, line);
+          posix::Abort();
+        }
+        else {
+          // At this point we are sure that the object we found is of the same
+          // type we are looking for, so we downcast it to that type
+          // without further checks.
+          typed_test_info = CheckedDowncastToActualType <
+                            ParameterizedTestCaseInfo<TestCase> > (*it);
+        }
+
+        break;
+      }
+    }
+
+    if (typed_test_info == NULL) {
+      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
+      test_case_infos_.push_back(typed_test_info);
+    }
+
+    return typed_test_info;
+  }
+  void RegisterTests() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      (*it)->RegisterTests();
+    }
+  }
+
+ private:
+  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
+
+  TestCaseInfoContainer test_case_infos_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  //  GTEST_HAS_PARAM_TEST
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
@@ -0,0 +1,167 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test.  They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by <gtest/internal/gtest-internal.h>.
+// It should not be #included by other files.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+# include <mem.h>
+#endif
+
+#include <string.h>
+#include <string>
+
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+namespace internal {
+
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
+ public:
+  // Static utility methods
+
+  // Clones a 0-terminated C string, allocating memory using new.  The
+  // caller is responsible for deleting the return value using
+  // delete[].  Returns the cloned string, or NULL if the input is
+  // NULL.
+  //
+  // This is different from strdup() in string.h, which allocates
+  // memory using malloc().
+  static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+  // able to pass strings to Win32 APIs on CE we need to convert them
+  // to 'Unicode', UTF-16.
+
+  // Creates a UTF-16 wide string from the given ANSI string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the wide string, or NULL if the
+  // input is NULL.
+  //
+  // The wide string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static LPCWSTR AnsiToUtf16(const char* c_str);
+
+  // Creates an ANSI string from the given wide string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the ANSI string, or NULL if the
+  // input is NULL.
+  //
+  // The returned string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+  // Compares two C strings.  Returns true iff they have the same content.
+  //
+  // Unlike strcmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CStringEquals(const char* lhs, const char* rhs);
+
+  // Converts a wide C string to a String using the UTF-8 encoding.
+  // NULL will be converted to "(null)".  If an error occurred during
+  // the conversion, "(failed to convert from wide string)" is
+  // returned.
+  static std::string ShowWideCString(const wchar_t* wide_c_str);
+
+  // Compares two wide C strings.  Returns true iff they have the same
+  // content.
+  //
+  // Unlike wcscmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+  // Compares two C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike strcasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CaseInsensitiveCStringEquals(const char* lhs,
+      const char* rhs);
+
+  // Compares two wide C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+      const wchar_t* rhs);
+
+  // Returns true iff the given string ends with the given suffix, ignoring
+  // case. Any string is considered to end with an empty suffix.
+  static bool EndsWithCaseInsensitive(
+    const std::string& str, const std::string& suffix);
+
+  // Formats an int value as "%02d".
+  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
+
+  // Formats an int value as "%X".
+  static std::string FormatHexInt(int value);
+
+  // Formats a byte as "%02X".
+  static std::string FormatByte(unsigned char value);
+
+ private:
+  String();  // Not meant to be instantiated.
+};  // class String
+
+// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
@@ -0,0 +1,339 @@
+$$ -*- mode: c++; -*-
+$var n = 10  $$ Maximum number of tuple fields we want to support.
+$$ This meta comment fixes auto-indentation in Emacs. }}
+// Copyright 2009 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+
+#include <utility>  // For ::std::pair.
+
+// The compiler used in Symbian has a bug that prevents us from declaring the
+// tuple template as a friend (it complains that tuple is redefined).  This
+// hack bypasses the bug by declaring the members that should otherwise be
+// private as public.
+// Sun Studio versions < 12 also have the above bug.
+#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
+#else
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
+    template <GTEST_$(n)_TYPENAMES_(U)> friend class tuple; \
+   private:
+#endif
+
+
+$range i 0..n-1
+$range j 0..n
+$range k 1..n
+// GTEST_n_TUPLE_(T) is the type of an n-tuple.
+#define GTEST_0_TUPLE_(T) tuple<>
+
+$for k [[
+$range m 0..k-1
+$range m2 k..n-1
+#define GTEST_$(k)_TUPLE_(T) tuple<$for m, [[T##$m]]$for m2 [[, void]]>
+
+]]
+
+// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
+
+$for j [[
+$range m 0..j-1
+#define GTEST_$(j)_TYPENAMES_(T) $for m, [[typename T##$m]]
+
+
+]]
+
+// In theory, defining stuff in the ::std namespace is undefined
+// behavior.  We can do this as we are playing the role of a standard
+// library vendor.
+namespace std {
+namespace tr1 {
+
+template <$for i, [[typename T$i = void]]>
+class tuple;
+
+// Anything in namespace gtest_internal is Google Test's INTERNAL
+// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
+namespace gtest_internal {
+
+// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
+template <typename T>
+struct ByRef { typedef const T& type; };  // NOLINT
+template <typename T>
+struct ByRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for ByRef.
+#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
+
+// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
+// is the same as tr1::add_reference<T>::type.
+template <typename T>
+struct AddRef { typedef T& type; };  // NOLINT
+template <typename T>
+struct AddRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for AddRef.
+#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
+
+// A helper for implementing get<k>().
+template <int k> class Get;
+
+// A helper for implementing tuple_element<k, T>.  kIndexValid is true
+// iff k < the number of fields in tuple type T.
+template <bool kIndexValid, int kIndex, class Tuple>
+struct TupleElement;
+
+
+$for i [[
+template <GTEST_$(n)_TYPENAMES_(T)>
+struct TupleElement<true, $i, GTEST_$(n)_TUPLE_(T) > {
+  typedef T$i type;
+};
+
+
+]]
+}  // namespace gtest_internal
+
+template <>
+class tuple<> {
+ public:
+  tuple() {}
+  tuple(const tuple& /* t */)  {}
+  tuple& operator=(const tuple& /* t */) { return *this; }
+};
+
+
+$for k [[
+$range m 0..k-1
+template <GTEST_$(k)_TYPENAMES_(T)>
+class $if k < n [[GTEST_$(k)_TUPLE_(T)]] $else [[tuple]] {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : $for m, [[f$(m)_()]] {}
+
+  explicit tuple($for m, [[GTEST_BY_REF_(T$m) f$m]]) : [[]]
+$for m, [[f$(m)_(f$m)]] {}
+
+  tuple(const tuple& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
+
+  template <GTEST_$(k)_TYPENAMES_(U)>
+  tuple(const GTEST_$(k)_TUPLE_(U)& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
+
+$if k == 2 [[
+  template <typename U0, typename U1>
+  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
+
+]]
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_$(k)_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_$(k)_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+$if k == 2 [[
+  template <typename U0, typename U1>
+  tuple& operator=(const ::std::pair<U0, U1>& p) {
+    f0_ = p.first;
+    f1_ = p.second;
+    return *this;
+  }
+
+]]
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_$(k)_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_$(k)_TUPLE_(U)& t) {
+
+$for m [[
+    f$(m)_ = t.f$(m)_;
+
+]]
+    return *this;
+  }
+
+
+$for m [[
+  T$m f$(m)_;
+
+]]
+};
+
+
+]]
+// 6.1.3.2 Tuple creation functions.
+
+// Known limitations: we don't support passing an
+// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
+// implement tie().
+
+inline tuple<> make_tuple() { return tuple<>(); }
+
+$for k [[
+$range m 0..k-1
+
+template <GTEST_$(k)_TYPENAMES_(T)>
+inline GTEST_$(k)_TUPLE_(T) make_tuple($for m, [[const T$m& f$m]]) {
+  return GTEST_$(k)_TUPLE_(T)($for m, [[f$m]]);
+}
+
+]]
+
+// 6.1.3.3 Tuple helper classes.
+
+template <typename Tuple> struct tuple_size;
+
+
+$for j [[
+template <GTEST_$(j)_TYPENAMES_(T)>
+struct tuple_size<GTEST_$(j)_TUPLE_(T) > {
+  static const int value = $j;
+};
+
+
+]]
+template <int k, class Tuple>
+struct tuple_element {
+  typedef typename gtest_internal::TupleElement<
+      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
+};
+
+#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
+
+// 6.1.3.4 Element access.
+
+namespace gtest_internal {
+
+
+$for i [[
+template <>
+class Get<$i> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
+  Field(Tuple& t) { return t.f$(i)_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
+  ConstField(const Tuple& t) { return t.f$(i)_; }
+};
+
+
+]]
+}  // namespace gtest_internal
+
+template <int k, GTEST_$(n)_TYPENAMES_(T)>
+GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
+get(GTEST_$(n)_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::Field(t);
+}
+
+template <int k, GTEST_$(n)_TYPENAMES_(T)>
+GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_$(n)_TUPLE_(T)))
+get(const GTEST_$(n)_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::ConstField(t);
+}
+
+// 6.1.3.5 Relational operators
+
+// We only implement == and !=, as we don't have a need for the rest yet.
+
+namespace gtest_internal {
+
+// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
+// first k fields of t1 equals the first k fields of t2.
+// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
+// k1 != k2.
+template <int kSize1, int kSize2>
+struct SameSizeTuplePrefixComparator;
+
+template <>
+struct SameSizeTuplePrefixComparator<0, 0> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
+    return true;
+  }
+};
+
+template <int k>
+struct SameSizeTuplePrefixComparator<k, k> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
+    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
+        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
+  }
+};
+
+}  // namespace gtest_internal
+
+template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
+inline bool operator==(const GTEST_$(n)_TUPLE_(T)& t,
+                       const GTEST_$(n)_TUPLE_(U)& u) {
+  return gtest_internal::SameSizeTuplePrefixComparator<
+      tuple_size<GTEST_$(n)_TUPLE_(T) >::value,
+      tuple_size<GTEST_$(n)_TUPLE_(U) >::value>::Eq(t, u);
+}
+
+template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
+inline bool operator!=(const GTEST_$(n)_TUPLE_(T)& t,
+                       const GTEST_$(n)_TUPLE_(U)& u) { return !(t == u); }
+
+// 6.1.4 Pairs.
+// Unimplemented.
+
+}  // namespace tr1
+}  // namespace std
+
+
+$for j [[
+#undef GTEST_$(j)_TUPLE_
+
+]]
+
+
+$for j [[
+#undef GTEST_$(j)_TYPENAMES_
+
+]]
+
+#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
+#undef GTEST_BY_REF_
+#undef GTEST_ADD_REF_
+#undef GTEST_TUPLE_ELEMENT_
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
@@ -0,0 +1,297 @@
+$$ -*- mode: c++; -*-
+$var n = 50  $$ Maximum length of type lists we want to support.
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently we support at most $n types in a list, and at most $n
+// type-parameterized tests in one type-parameterized test case.
+// Please contact googletestframework@googlegroups.com if you need
+// more.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+#include "gtest/internal/gtest-port.h"
+
+// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+# if GTEST_HAS_CXXABI_H_
+#  include <cxxabi.h>
+# elif defined(__HP_aCC)
+#  include <acxx_demangle.h>
+# endif  // GTEST_HASH_CXXABI_H_
+
+namespace testing {
+namespace internal {
+
+// GetTypeName<T>() returns a human-readable name of type T.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+# if GTEST_HAS_RTTI
+
+  const char* const name = typeid(T).name();
+#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+  int status = 0;
+  // gcc's implementation of typeid(T).name() mangles the type name,
+  // so we have to demangle it.
+#   if GTEST_HAS_CXXABI_H_
+  using abi::__cxa_demangle;
+#   endif  // GTEST_HAS_CXXABI_H_
+  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
+  const std::string name_str(status == 0 ? readable_name : name);
+  free(readable_name);
+  return name_str;
+#  else
+  return name;
+#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+
+# else
+
+  return "<type>";
+
+# endif  // GTEST_HAS_RTTI
+}
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
+// type.  This can be used as a compile-time assertion to ensure that
+// two types are equal.
+
+template <typename T1, typename T2>
+struct AssertTypeEq;
+
+template <typename T>
+struct AssertTypeEq<T, T> {
+  typedef bool type;
+};
+
+// A unique type used as the default value for the arguments of class
+// template Types.  This allows us to simulate variadic templates
+// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
+// support directly.
+struct None {};
+
+// The following family of struct and struct templates are used to
+// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
+// represents a type list with N types (T1, T2, ..., and TN) in it.
+// Except for Types0, every struct in the family has two member types:
+// Head for the first type in the list, and Tail for the rest of the
+// list.
+
+// The empty type list.
+struct Types0 {};
+
+// Type lists of length 1, 2, 3, and so on.
+
+template <typename T1>
+struct Types1 {
+  typedef T1 Head;
+  typedef Types0 Tail;
+};
+
+$range i 2..n
+
+$for i [[
+$range j 1..i
+$range k 2..i
+template <$for j, [[typename T$j]]>
+struct Types$i {
+  typedef T1 Head;
+  typedef Types$(i-1)<$for k, [[T$k]]> Tail;
+};
+
+
+]]
+
+}  // namespace internal
+
+// We don't want to require the users to write TypesN<...> directly,
+// as that would require them to count the length.  Types<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Types<int>
+// will appear as Types<int, None, None, ..., None> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Types<T1, ..., TN>, and Google Test will translate
+// that to TypesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Types template.
+
+$range i 1..n
+template <$for i, [[typename T$i = internal::None]]>
+struct Types {
+  typedef internal::Types$n<$for i, [[T$i]]> type;
+};
+
+template <>
+struct Types<$for i, [[internal::None]]> {
+  typedef internal::Types0 type;
+};
+
+$range i 1..n-1
+$for i [[
+$range j 1..i
+$range k i+1..n
+template <$for j, [[typename T$j]]>
+struct Types<$for j, [[T$j]]$for k[[, internal::None]]> {
+  typedef internal::Types$i<$for j, [[T$j]]> type;
+};
+
+]]
+
+namespace internal {
+
+# define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>.  This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+  template <typename T>
+  struct Bind {
+    typedef Tmpl<T> type;
+  };
+};
+
+# define GTEST_BIND_(TmplSel, T) \
+  TmplSel::template Bind<T>::type
+
+// A unique struct template used as the default value for the
+// arguments of class template Templates.  This allows us to simulate
+// variadic templates (e.g. Templates<int>, Templates<int, double>,
+// and etc), which C++ doesn't support directly.
+template <typename T>
+struct NoneT {};
+
+// The following family of struct and struct templates are used to
+// represent template lists.  In particular, TemplatesN<T1, T2, ...,
+// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
+// for Templates0, every struct in the family has two member types:
+// Head for the selector of the first template in the list, and Tail
+// for the rest of the list.
+
+// The empty template list.
+struct Templates0 {};
+
+// Template lists of length 1, 2, 3, and so on.
+
+template <GTEST_TEMPLATE_ T1>
+struct Templates1 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates0 Tail;
+};
+
+$range i 2..n
+
+$for i [[
+$range j 1..i
+$range k 2..i
+template <$for j, [[GTEST_TEMPLATE_ T$j]]>
+struct Templates$i {
+  typedef TemplateSel<T1> Head;
+  typedef Templates$(i-1)<$for k, [[T$k]]> Tail;
+};
+
+
+]]
+
+// We don't want to require the users to write TemplatesN<...> directly,
+// as that would require them to count the length.  Templates<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Templates<list>
+// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Templates<T1, ..., TN>, and Google Test will translate
+// that to TemplatesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Templates template.
+
+$range i 1..n
+template <$for i, [[GTEST_TEMPLATE_ T$i = NoneT]]>
+struct Templates {
+  typedef Templates$n<$for i, [[T$i]]> type;
+};
+
+template <>
+struct Templates<$for i, [[NoneT]]> {
+  typedef Templates0 type;
+};
+
+$range i 1..n-1
+$for i [[
+$range j 1..i
+$range k i+1..n
+template <$for j, [[GTEST_TEMPLATE_ T$j]]>
+struct Templates<$for j, [[T$j]]$for k[[, NoneT]]> {
+  typedef Templates$i<$for j, [[T$j]]> type;
+};
+
+]]
+
+// The TypeList template makes it possible to use either a single type
+// or a Types<...> list in TYPED_TEST_CASE() and
+// INSTANTIATE_TYPED_TEST_CASE_P().
+
+template <typename T>
+struct TypeList {
+  typedef Types1<T> type;
+};
+
+
+$range i 1..n
+template <$for i, [[typename T$i]]>
+struct TypeList<Types<$for i, [[T$i]]> > {
+  typedef typename Types<$for i, [[T$i]]>::type type;
+};
+
+#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
@@ -0,0 +1,51 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// Google C++ Testing Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+
+// The following lines pull in the real gtest *.cc files.
+/**
+#include "src/gtest.cc"
+#include "src/gtest-death-test.cc"
+#include "src/gtest-filepath.cc"
+#include "src/gtest-port.cc"
+#include "src/gtest-printers.cc"
+#include "src/gtest-test-part.cc"
+#include "src/gtest-typed-test.cc"
+**/
@@ -0,0 +1,409 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: keith.ray@gmail.com (Keith Ray)
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-filepath.h"
+#include "gtest/internal/gtest-port.h"
+
+#include <stdlib.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+# include <windows.h>
+#elif GTEST_OS_WINDOWS
+# include <direct.h>
+# include <io.h>
+#elif GTEST_OS_SYMBIAN
+// Symbian OpenC has PATH_MAX in sys/syslimits.h
+# include <sys/syslimits.h>
+#else
+# include <limits.h>
+# include <climits>  // Some Linux distributions define PATH_MAX here.
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_WINDOWS
+# define GTEST_PATH_MAX_ _MAX_PATH
+#elif defined(PATH_MAX)
+# define GTEST_PATH_MAX_ PATH_MAX
+#elif defined(_XOPEN_PATH_MAX)
+# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
+#else
+# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
+#endif  // GTEST_OS_WINDOWS
+
+#include "gtest/internal/gtest-string.h"
+
+namespace testing {
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+// On Windows, '\\' is the standard path separator, but many tools and the
+// Windows API also accept '/' as an alternate path separator. Unless otherwise
+// noted, a file path can contain either kind of path separators, or a mixture
+// of them.
+const char kPathSeparator = '\\';
+const char kAlternatePathSeparator = '/';
+const char kPathSeparatorString[] = "\\";
+const char kAlternatePathSeparatorString[] = "/";
+# if GTEST_OS_WINDOWS_MOBILE
+// Windows CE doesn't have a current directory. You should not use
+// the current directory in tests on Windows CE, but this at least
+// provides a reasonable fallback.
+const char kCurrentDirectoryString[] = "\\";
+// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
+const DWORD kInvalidFileAttributes = 0xffffffff;
+# else
+const char kCurrentDirectoryString[] = ".\\";
+# endif  // GTEST_OS_WINDOWS_MOBILE
+#else
+const char kPathSeparator = '/';
+const char kPathSeparatorString[] = "/";
+const char kCurrentDirectoryString[] = "./";
+#endif  // GTEST_OS_WINDOWS
+
+// Returns whether the given character is a valid path separator.
+static bool IsPathSeparator(char c) {
+#if GTEST_HAS_ALT_PATH_SEP_
+  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
+#else
+  return c == kPathSeparator;
+#endif
+}
+
+// Returns the current working directory, or "" if unsuccessful.
+FilePath FilePath::GetCurrentDir() {
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE doesn't have a current directory, so we just return
+  // something reasonable.
+  return FilePath(kCurrentDirectoryString);
+#elif GTEST_OS_WINDOWS
+  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#else
+  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+  return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns a copy of the FilePath with the case-insensitive extension removed.
+// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+// FilePath("dir/file"). If a case-insensitive extension is not
+// found, returns a copy of the original FilePath.
+FilePath FilePath::RemoveExtension(const char* extension) const {
+  const std::string dot_extension = std::string(".") + extension;
+
+  if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
+    return FilePath(pathname_.substr(
+                      0, pathname_.length() - dot_extension.length()));
+  }
+
+  return *this;
+}
+
+// Returns a pointer to the last occurence of a valid path separator in
+// the FilePath. On Windows, for example, both '/' and '\' are valid path
+// separators. Returns NULL if no path separator was found.
+const char* FilePath::FindLastPathSeparator() const {
+  const char* const last_sep = strrchr(c_str(), kPathSeparator);
+#if GTEST_HAS_ALT_PATH_SEP_
+  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
+
+  // Comparing two pointers of which only one is NULL is undefined.
+  if (last_alt_sep != NULL &&
+      (last_sep == NULL || last_alt_sep > last_sep)) {
+    return last_alt_sep;
+  }
+
+#endif
+  return last_sep;
+}
+
+// Returns a copy of the FilePath with the directory part removed.
+// Example: FilePath("path/to/file").RemoveDirectoryName() returns
+// FilePath("file"). If there is no directory part ("just_a_file"), it returns
+// the FilePath unmodified. If there is no file part ("just_a_dir/") it
+// returns an empty FilePath ("").
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveDirectoryName() const {
+  const char* const last_sep = FindLastPathSeparator();
+  return last_sep ? FilePath(last_sep + 1) : *this;
+}
+
+// RemoveFileName returns the directory path with the filename removed.
+// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveFileName() const {
+  const char* const last_sep = FindLastPathSeparator();
+  std::string dir;
+
+  if (last_sep) {
+    dir = std::string(c_str(), last_sep + 1 - c_str());
+  }
+  else {
+    dir = kCurrentDirectoryString;
+  }
+
+  return FilePath(dir);
+}
+
+// Helper functions for naming files in a directory for xml output.
+
+// Given directory = "dir", base_name = "test", number = 0,
+// extension = "xml", returns "dir/test.xml". If number is greater
+// than zero (e.g., 12), returns "dir/test_12.xml".
+// On Windows platform, uses \ as the separator rather than /.
+FilePath FilePath::MakeFileName(const FilePath& directory,
+                                const FilePath& base_name,
+                                int number,
+                                const char* extension) {
+  std::string file;
+
+  if (number == 0) {
+    file = base_name.string() + "." + extension;
+  }
+  else {
+    file = base_name.string() + "_" + StreamableToString(number)
+           + "." + extension;
+  }
+
+  return ConcatPaths(directory, FilePath(file));
+}
+
+// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
+// On Windows, uses \ as the separator rather than /.
+FilePath FilePath::ConcatPaths(const FilePath& directory,
+                               const FilePath& relative_path) {
+  if (directory.IsEmpty()) {
+    return relative_path;
+  }
+
+  const FilePath dir(directory.RemoveTrailingPathSeparator());
+  return FilePath(dir.string() + kPathSeparator + relative_path.string());
+}
+
+// Returns true if pathname describes something findable in the file-system,
+// either a file, directory, or whatever.
+bool FilePath::FileOrDirectoryExists() const {
+#if GTEST_OS_WINDOWS_MOBILE
+  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
+  const DWORD attributes = GetFileAttributes(unicode);
+  delete [] unicode;
+  return attributes != kInvalidFileAttributes;
+#else
+  posix::StatStruct file_stat;
+  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
+#endif  // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns true if pathname describes a directory in the file-system
+// that exists.
+bool FilePath::DirectoryExists() const {
+  bool result = false;
+#if GTEST_OS_WINDOWS
+  // Don't strip off trailing separator if path is a root directory on
+  // Windows (like "C:\\").
+  const FilePath& path(IsRootDirectory() ? *this :
+                       RemoveTrailingPathSeparator());
+#else
+  const FilePath& path(*this);
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
+  const DWORD attributes = GetFileAttributes(unicode);
+  delete [] unicode;
+
+  if ((attributes != kInvalidFileAttributes) &&
+      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+    result = true;
+  }
+
+#else
+  posix::StatStruct file_stat;
+  result = posix::Stat(path.c_str(), &file_stat) == 0 &&
+           posix::IsDir(file_stat);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+  return result;
+}
+
+// Returns true if pathname describes a root directory. (Windows has one
+// root directory per disk drive.)
+bool FilePath::IsRootDirectory() const {
+#if GTEST_OS_WINDOWS
+  // TODO(wan@google.com): on Windows a network share like
+  // \\server\share can be a root directory, although it cannot be the
+  // current directory.  Handle this properly.
+  return pathname_.length() == 3 && IsAbsolutePath();
+#else
+  return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
+#endif
+}
+
+// Returns true if pathname describes an absolute path.
+bool FilePath::IsAbsolutePath() const {
+  const char* const name = pathname_.c_str();
+#if GTEST_OS_WINDOWS
+  return pathname_.length() >= 3 &&
+         ((name[0] >= 'a' && name[0] <= 'z') ||
+          (name[0] >= 'A' && name[0] <= 'Z')) &&
+         name[1] == ':' &&
+         IsPathSeparator(name[2]);
+#else
+  return IsPathSeparator(name[0]);
+#endif
+}
+
+// Returns a pathname for a file that does not currently exist. The pathname
+// will be directory/base_name.extension or
+// directory/base_name_<number>.extension if directory/base_name.extension
+// already exists. The number will be incremented until a pathname is found
+// that does not already exist.
+// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+// There could be a race condition if two or more processes are calling this
+// function at the same time -- they could both pick the same filename.
+FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
+    const FilePath& base_name,
+    const char* extension) {
+  FilePath full_pathname;
+  int number = 0;
+
+  do {
+    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
+  }
+  while (full_pathname.FileOrDirectoryExists());
+
+  return full_pathname;
+}
+
+// Returns true if FilePath ends with a path separator, which indicates that
+// it is intended to represent a directory. Returns false otherwise.
+// This does NOT check that a directory (or file) actually exists.
+bool FilePath::IsDirectory() const {
+  return !pathname_.empty() &&
+         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
+}
+
+// Create directories so that path exists. Returns true if successful or if
+// the directories already exist; returns false if unable to create directories
+// for any reason.
+bool FilePath::CreateDirectoriesRecursively() const {
+  if (!this->IsDirectory()) {
+    return false;
+  }
+
+  if (pathname_.length() == 0 || this->DirectoryExists()) {
+    return true;
+  }
+
+  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
+  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
+}
+
+// Create the directory so that path exists. Returns true if successful or
+// if the directory already exists; returns false if unable to create the
+// directory for any reason, including if the parent directory does not
+// exist. Not named "CreateDirectory" because that's a macro on Windows.
+bool FilePath::CreateFolder() const {
+#if GTEST_OS_WINDOWS_MOBILE
+  FilePath removed_sep(this->RemoveTrailingPathSeparator());
+  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
+  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
+  delete [] unicode;
+#elif GTEST_OS_WINDOWS
+  int result = _mkdir(pathname_.c_str());
+#else
+  int result = mkdir(pathname_.c_str(), 0777);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+  if (result == -1) {
+    return this->DirectoryExists();  // An error is OK if the directory exists.
+  }
+
+  return true;  // No error.
+}
+
+// If input name has a trailing separator character, remove it and return the
+// name, otherwise return the name string unmodified.
+// On Windows platform, uses \ as the separator, other platforms use /.
+FilePath FilePath::RemoveTrailingPathSeparator() const {
+  return IsDirectory()
+         ? FilePath(pathname_.substr(0, pathname_.length() - 1))
+         : *this;
+}
+
+// Removes any redundant separators that might be in the pathname.
+// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+// redundancies that might be in a pathname involving "." or "..".
+// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
+void FilePath::Normalize() {
+  if (pathname_.c_str() == NULL) {
+    pathname_ = "";
+    return;
+  }
+
+  const char* src = pathname_.c_str();
+  char* const dest = new char[pathname_.length() + 1];
+  char* dest_ptr = dest;
+  memset(dest_ptr, 0, pathname_.length() + 1);
+
+  while (*src != '\0') {
+    *dest_ptr = *src;
+
+    if (!IsPathSeparator(*src)) {
+      src++;
+    }
+    else {
+#if GTEST_HAS_ALT_PATH_SEP_
+
+      if (*dest_ptr == kAlternatePathSeparator) {
+        *dest_ptr = kPathSeparator;
+      }
+
+#endif
+
+      while (IsPathSeparator(*src)) {
+        src++;
+      }
+    }
+
+    dest_ptr++;
+  }
+
+  *dest_ptr = '\0';
+  pathname_ = dest;
+  delete[] dest;
+}
+
+}  // namespace internal
+}  // namespace testing
@@ -0,0 +1,884 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#include "gtest/internal/gtest-port.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+# include <windows.h>  // For TerminateProcess()
+#elif GTEST_OS_WINDOWS
+# include <io.h>
+# include <sys/stat.h>
+#else
+# include <unistd.h>
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_MAC
+# include <mach/mach_init.h>
+# include <mach/task.h>
+# include <mach/vm_map.h>
+#endif  // GTEST_OS_MAC
+
+#if GTEST_OS_QNX
+# include <devctl.h>
+# include <sys/procfs.h>
+#endif  // GTEST_OS_QNX
+
+#include "gtest/gtest-spi.h"
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "src/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+namespace internal {
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
+const int kStdOutFileno = 1;
+const int kStdErrFileno = 2;
+#else
+const int kStdOutFileno = STDOUT_FILENO;
+const int kStdErrFileno = STDERR_FILENO;
+#endif  // _MSC_VER
+
+#if GTEST_OS_MAC
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  const task_t task = mach_task_self();
+  mach_msg_type_number_t thread_count;
+  thread_act_array_t thread_list;
+  const kern_return_t status = task_threads(task, &thread_list, &thread_count);
+
+  if (status == KERN_SUCCESS) {
+    // task_threads allocates resources in thread_list and we need to free them
+    // to avoid leaks.
+    vm_deallocate(task,
+                  reinterpret_cast<vm_address_t>(thread_list),
+                  sizeof(thread_t) * thread_count);
+    return static_cast<size_t>(thread_count);
+  }
+  else {
+    return 0;
+  }
+}
+
+#elif GTEST_OS_QNX
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  const int fd = open("/proc/self/as", O_RDONLY);
+
+  if (fd < 0) {
+    return 0;
+  }
+
+  procfs_info process_info;
+  const int status =
+    devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
+  close(fd);
+
+  if (status == EOK) {
+    return static_cast<size_t>(process_info.num_threads);
+  }
+  else {
+    return 0;
+  }
+}
+
+#else
+
+size_t GetThreadCount() {
+  // There's no portable way to detect the number of threads, so we just
+  // return 0 to indicate that we cannot detect it.
+  return 0;
+}
+
+#endif  // GTEST_OS_MAC
+
+#if GTEST_USES_POSIX_RE
+
+// Implements RE.  Currently only needed for death tests.
+
+RE::~RE() {
+  if (is_valid_) {
+    // regfree'ing an invalid regex might crash because the content
+    // of the regex is undefined. Since the regex's are essentially
+    // the same, one cannot be valid (or invalid) without the other
+    // being so too.
+    regfree(&partial_regex_);
+    regfree(&full_regex_);
+  }
+
+  free(const_cast<char*>(pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+  if (!re.is_valid_) {
+    return false;
+  }
+
+  regmatch_t match;
+  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+  if (!re.is_valid_) {
+    return false;
+  }
+
+  regmatch_t match;
+  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+  pattern_ = posix::StrDup(regex);
+
+  // Reserves enough bytes to hold the regular expression used for a
+  // full match.
+  const size_t full_regex_len = strlen(regex) + 10;
+  char* const full_pattern = new char[full_regex_len];
+
+  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
+  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
+
+  // We want to call regcomp(&partial_regex_, ...) even if the
+  // previous expression returns false.  Otherwise partial_regex_ may
+  // not be properly initialized can may cause trouble when it's
+  // freed.
+  //
+  // Some implementation of POSIX regex (e.g. on at least some
+  // versions of Cygwin) doesn't accept the empty string as a valid
+  // regex.  We change it to an equivalent form "()" to be safe.
+  if (is_valid_) {
+    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
+    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
+  }
+
+  EXPECT_TRUE(is_valid_)
+      << "Regular expression \"" << regex
+      << "\" is not a valid POSIX Extended regular expression.";
+
+  delete[] full_pattern;
+}
+
+#elif GTEST_USES_SIMPLE_RE
+
+// Returns true iff ch appears anywhere in str (excluding the
+// terminating '\0' character).
+bool IsInSet(char ch, const char* str) {
+  return ch != '\0' && strchr(str, ch) != NULL;
+}
+
+// Returns true iff ch belongs to the given classification.  Unlike
+// similar functions in <ctype.h>, these aren't affected by the
+// current locale.
+bool IsAsciiDigit(char ch) {
+  return '0' <= ch && ch <= '9';
+}
+bool IsAsciiPunct(char ch) {
+  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
+}
+bool IsRepeat(char ch) {
+  return IsInSet(ch, "?*+");
+}
+bool IsAsciiWhiteSpace(char ch) {
+  return IsInSet(ch, " \f\n\r\t\v");
+}
+bool IsAsciiWordChar(char ch) {
+  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
+         ('0' <= ch && ch <= '9') || ch == '_';
+}
+
+// Returns true iff "\\c" is a supported escape sequence.
+bool IsValidEscape(char c) {
+  return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
+}
+
+// Returns true iff the given atom (specified by escaped and pattern)
+// matches ch.  The result is undefined if the atom is invalid.
+bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
+  if (escaped) {  // "\\p" where p is pattern_char.
+    switch (pattern_char) {
+      case 'd':
+        return IsAsciiDigit(ch);
+
+      case 'D':
+        return !IsAsciiDigit(ch);
+
+      case 'f':
+        return ch == '\f';
+
+      case 'n':
+        return ch == '\n';
+
+      case 'r':
+        return ch == '\r';
+
+      case 's':
+        return IsAsciiWhiteSpace(ch);
+
+      case 'S':
+        return !IsAsciiWhiteSpace(ch);
+
+      case 't':
+        return ch == '\t';
+
+      case 'v':
+        return ch == '\v';
+
+      case 'w':
+        return IsAsciiWordChar(ch);
+
+      case 'W':
+        return !IsAsciiWordChar(ch);
+    }
+
+    return IsAsciiPunct(pattern_char) && pattern_char == ch;
+  }
+
+  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
+}
+
+// Helper function used by ValidateRegex() to format error messages.
+std::string FormatRegexSyntaxError(const char* regex, int index) {
+  return (Message() << "Syntax error at index " << index
+          << " in simple regular expression \"" << regex << "\": ").GetString();
+}
+
+// Generates non-fatal failures and returns false if regex is invalid;
+// otherwise returns true.
+bool ValidateRegex(const char* regex) {
+  if (regex == NULL) {
+    // TODO(wan@google.com): fix the source file location in the
+    // assertion failures to match where the regex is used in user
+    // code.
+    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
+    return false;
+  }
+
+  bool is_valid = true;
+
+  // True iff ?, *, or + can follow the previous atom.
+  bool prev_repeatable = false;
+
+  for (int i = 0; regex[i]; i++) {
+    if (regex[i] == '\\') {  // An escape sequence
+      i++;
+
+      if (regex[i] == '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "'\\' cannot appear at the end.";
+        return false;
+      }
+
+      if (!IsValidEscape(regex[i])) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "invalid escape sequence \"\\" << regex[i] << "\".";
+        is_valid = false;
+      }
+
+      prev_repeatable = true;
+    }
+    else {    // Not an escape sequence.
+      const char ch = regex[i];
+
+      if (ch == '^' && i > 0) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'^' can only appear at the beginning.";
+        is_valid = false;
+      }
+      else if (ch == '$' && regex[i + 1] != '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'$' can only appear at the end.";
+        is_valid = false;
+      }
+      else if (IsInSet(ch, "()[]{}|")) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' is unsupported.";
+        is_valid = false;
+      }
+      else if (IsRepeat(ch) && !prev_repeatable) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' can only follow a repeatable token.";
+        is_valid = false;
+      }
+
+      prev_repeatable = !IsInSet(ch, "^$?*+");
+    }
+  }
+
+  return is_valid;
+}
+
+// Matches a repeated regex atom followed by a valid simple regular
+// expression.  The regex atom is defined as c if escaped is false,
+// or \c otherwise.  repeat is the repetition meta character (?, *,
+// or +).  The behavior is undefined if str contains too many
+// characters to be indexable by size_t, in which case the test will
+// probably time out anyway.  We are fine with this limitation as
+// std::string has it too.
+bool MatchRepetitionAndRegexAtHead(
+  bool escaped, char c, char repeat, const char* regex,
+  const char* str) {
+  const size_t min_count = (repeat == '+') ? 1 : 0;
+  const size_t max_count = (repeat == '?') ? 1 :
+                           static_cast<size_t>(-1) - 1;
+  // We cannot call numeric_limits::max() as it conflicts with the
+  // max() macro on Windows.
+
+  for (size_t i = 0; i <= max_count; ++i) {
+    // We know that the atom matches each of the first i characters in str.
+    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
+      // We have enough matches at the head, and the tail matches too.
+      // Since we only care about *whether* the pattern matches str
+      // (as opposed to *how* it matches), there is no need to find a
+      // greedy match.
+      return true;
+    }
+
+    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) {
+      return false;
+    }
+  }
+
+  return false;
+}
+
+// Returns true iff regex matches a prefix of str.  regex must be a
+// valid simple regular expression and not start with "^", or the
+// result is undefined.
+bool MatchRegexAtHead(const char* regex, const char* str) {
+  if (*regex == '\0') { // An empty regex matches a prefix of anything.
+    return true;
+  }
+
+  // "$" only matches the end of a string.  Note that regex being
+  // valid guarantees that there's nothing after "$" in it.
+  if (*regex == '$') {
+    return *str == '\0';
+  }
+
+  // Is the first thing in regex an escape sequence?
+  const bool escaped = *regex == '\\';
+
+  if (escaped) {
+    ++regex;
+  }
+
+  if (IsRepeat(regex[1])) {
+    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
+    // here's an indirect recursion.  It terminates as the regex gets
+    // shorter in each recursion.
+    return MatchRepetitionAndRegexAtHead(
+             escaped, regex[0], regex[1], regex + 2, str);
+  }
+  else {
+    // regex isn't empty, isn't "$", and doesn't start with a
+    // repetition.  We match the first atom of regex with the first
+    // character of str and recurse.
+    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
+           MatchRegexAtHead(regex + 1, str + 1);
+  }
+}
+
+// Returns true iff regex matches any substring of str.  regex must be
+// a valid simple regular expression, or the result is undefined.
+//
+// The algorithm is recursive, but the recursion depth doesn't exceed
+// the regex length, so we won't need to worry about running out of
+// stack space normally.  In rare cases the time complexity can be
+// exponential with respect to the regex length + the string length,
+// but usually it's must faster (often close to linear).
+bool MatchRegexAnywhere(const char* regex, const char* str) {
+  if (regex == NULL || str == NULL) {
+    return false;
+  }
+
+  if (*regex == '^') {
+    return MatchRegexAtHead(regex + 1, str);
+  }
+
+  // A successful match can be anywhere in str.
+  do {
+    if (MatchRegexAtHead(regex, str)) {
+      return true;
+    }
+  }
+  while (*str++ != '\0');
+
+  return false;
+}
+
+// Implements the RE class.
+
+RE::~RE() {
+  free(const_cast<char*>(pattern_));
+  free(const_cast<char*>(full_pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+  pattern_ = full_pattern_ = NULL;
+
+  if (regex != NULL) {
+    pattern_ = posix::StrDup(regex);
+  }
+
+  is_valid_ = ValidateRegex(regex);
+
+  if (!is_valid_) {
+    // No need to calculate the full pattern when the regex is invalid.
+    return;
+  }
+
+  const size_t len = strlen(regex);
+  // Reserves enough bytes to hold the regular expression used for a
+  // full match: we need space to prepend a '^', append a '$', and
+  // terminate the string with '\0'.
+  char* buffer = static_cast<char*>(malloc(len + 3));
+  full_pattern_ = buffer;
+
+  if (*regex != '^') {
+    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.
+  }
+
+  // We don't use snprintf or strncpy, as they trigger a warning when
+  // compiled with VC++ 8.0.
+  memcpy(buffer, regex, len);
+  buffer += len;
+
+  if (len == 0 || regex[len - 1] != '$') {
+    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.
+  }
+
+  *buffer = '\0';
+}
+
+#endif  // GTEST_USES_POSIX_RE
+
+const char kUnknownFile[] = "unknown file";
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
+  const std::string file_name(file == NULL ? kUnknownFile : file);
+
+  if (line < 0) {
+    return file_name + ":";
+  }
+
+#ifdef _MSC_VER
+  return file_name + "(" + StreamableToString(line) + "):";
+#else
+  return file_name + ":" + StreamableToString(line) + ":";
+#endif  // _MSC_VER
+}
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+// Note that FormatCompilerIndependentFileLocation() does NOT append colon
+// to the file location it produces, unlike FormatFileLocation().
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
+  const char* file, int line) {
+  const std::string file_name(file == NULL ? kUnknownFile : file);
+
+  if (line < 0) {
+    return file_name;
+  }
+  else {
+    return file_name + ":" + StreamableToString(line);
+  }
+}
+
+
+GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
+  : severity_(severity) {
+  const char* const marker =
+    severity == GTEST_INFO ?    "[  INFO ]" :
+    severity == GTEST_WARNING ? "[WARNING]" :
+    severity == GTEST_ERROR ?   "[ ERROR ]" : "[ FATAL ]";
+  GetStream() << ::std::endl << marker << " "
+              << FormatFileLocation(file, line).c_str() << ": ";
+}
+
+// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+GTestLog::~GTestLog() {
+  GetStream() << ::std::endl;
+
+  if (severity_ == GTEST_FATAL) {
+    fflush(stderr);
+    posix::Abort();
+  }
+}
+// Disable Microsoft deprecation warnings for POSIX functions called from
+// this class (creat, dup, dup2, and close)
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4996)
+#endif  // _MSC_VER
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Object that captures an output stream (stdout/stderr).
+class CapturedStream {
+ public:
+  // The ctor redirects the stream to a temporary file.
+  explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
+# if GTEST_OS_WINDOWS
+    char temp_dir_path[MAX_PATH + 1] = { '\0' };  // NOLINT
+    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT
+
+    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
+    const UINT success = ::GetTempFileNameA(temp_dir_path,
+                                            "gtest_redir",
+                                            0,  // Generate unique file name.
+                                            temp_file_path);
+    GTEST_CHECK_(success != 0)
+        << "Unable to create a temporary file in " << temp_dir_path;
+    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
+    GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
+                                    << temp_file_path;
+    filename_ = temp_file_path;
+# else
+    // There's no guarantee that a test has write access to the current
+    // directory, so we create the temporary file in the /tmp directory
+    // instead. We use /tmp on most systems, and /sdcard on Android.
+    // That's because Android doesn't have /tmp.
+#  if GTEST_OS_LINUX_ANDROID
+    // Note: Android applications are expected to call the framework's
+    // Context.getExternalStorageDirectory() method through JNI to get
+    // the location of the world-writable SD Card directory. However,
+    // this requires a Context handle, which cannot be retrieved
+    // globally from native code. Doing so also precludes running the
+    // code as part of a regular standalone executable, which doesn't
+    // run in a Dalvik process (e.g. when running it through 'adb shell').
+    //
+    // The location /sdcard is directly accessible from native code
+    // and is the only location (unofficially) supported by the Android
+    // team. It's generally a symlink to the real SD Card mount point
+    // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
+    // other OEM-customized locations. Never rely on these, and always
+    // use /sdcard.
+    char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
+#  else
+    char name_template[] = "/tmp/captured_stream.XXXXXX";
+#  endif  // GTEST_OS_LINUX_ANDROID
+    const int captured_fd = mkstemp(name_template);
+    filename_ = name_template;
+# endif  // GTEST_OS_WINDOWS
+    fflush(NULL);
+    dup2(captured_fd, fd_);
+    close(captured_fd);
+  }
+
+  ~CapturedStream() {
+    remove(filename_.c_str());
+  }
+
+  std::string GetCapturedString() {
+    if (uncaptured_fd_ != -1) {
+      // Restores the original stream.
+      fflush(NULL);
+      dup2(uncaptured_fd_, fd_);
+      close(uncaptured_fd_);
+      uncaptured_fd_ = -1;
+    }
+
+    FILE* const file = posix::FOpen(filename_.c_str(), "r");
+    const std::string content = ReadEntireFile(file);
+    posix::FClose(file);
+    return content;
+  }
+
+ private:
+  // Reads the entire content of a file as an std::string.
+  static std::string ReadEntireFile(FILE* file);
+
+  // Returns the size (in bytes) of a file.
+  static size_t GetFileSize(FILE* file);
+
+  const int fd_;  // A stream to capture.
+  int uncaptured_fd_;
+  // Name of the temporary file holding the stderr output.
+  ::std::string filename_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
+};
+
+// Returns the size (in bytes) of a file.
+size_t CapturedStream::GetFileSize(FILE* file) {
+  fseek(file, 0, SEEK_END);
+  return static_cast<size_t>(ftell(file));
+}
+
+// Reads the entire content of a file as a string.
+std::string CapturedStream::ReadEntireFile(FILE* file) {
+  const size_t file_size = GetFileSize(file);
+  char* const buffer = new char[file_size];
+
+  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
+  size_t bytes_read = 0;       // # of bytes read so far
+
+  fseek(file, 0, SEEK_SET);
+
+  // Keeps reading the file until we cannot read further or the
+  // pre-determined file size is reached.
+  do {
+    bytes_last_read = fread(buffer + bytes_read, 1, file_size - bytes_read, file);
+    bytes_read += bytes_last_read;
+  }
+  while (bytes_last_read > 0 && bytes_read < file_size);
+
+  const std::string content(buffer, bytes_read);
+  delete[] buffer;
+
+  return content;
+}
+
+# ifdef _MSC_VER
+#  pragma warning(pop)
+# endif  // _MSC_VER
+
+static CapturedStream* g_captured_stderr = NULL;
+static CapturedStream* g_captured_stdout = NULL;
+
+// Starts capturing an output stream (stdout/stderr).
+void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
+  if (*stream != NULL) {
+    GTEST_LOG_(FATAL) << "Only one " << stream_name
+                      << " capturer can exist at a time.";
+  }
+
+  *stream = new CapturedStream(fd);
+}
+
+// Stops capturing the output stream and returns the captured string.
+std::string GetCapturedStream(CapturedStream** captured_stream) {
+  const std::string content = (*captured_stream)->GetCapturedString();
+
+  delete *captured_stream;
+  *captured_stream = NULL;
+
+  return content;
+}
+
+// Starts capturing stdout.
+void CaptureStdout() {
+  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
+}
+
+// Starts capturing stderr.
+void CaptureStderr() {
+  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
+}
+
+// Stops capturing stdout and returns the captured string.
+std::string GetCapturedStdout() {
+  return GetCapturedStream(&g_captured_stdout);
+}
+
+// Stops capturing stderr and returns the captured string.
+std::string GetCapturedStderr() {
+  return GetCapturedStream(&g_captured_stderr);
+}
+
+#endif  // GTEST_HAS_STREAM_REDIRECTION
+
+#if GTEST_HAS_DEATH_TEST
+
+// A copy of all command line arguments.  Set by InitGoogleTest().
+::std::vector<testing::internal::string> g_argvs;
+
+static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
+  NULL;  // Owned.
+
+void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
+  if (g_injected_test_argvs != argvs) {
+    delete g_injected_test_argvs;
+  }
+
+  g_injected_test_argvs = argvs;
+}
+
+const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
+  if (g_injected_test_argvs != NULL) {
+    return *g_injected_test_argvs;
+  }
+
+  return g_argvs;
+}
+#endif  // GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_WINDOWS_MOBILE
+namespace posix {
+void Abort() {
+  DebugBreak();
+  TerminateProcess(GetCurrentProcess(), 1);
+}
+}  // namespace posix
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+// Returns the name of the environment variable corresponding to the
+// given flag.  For example, FlagToEnvVar("foo") will return
+// "GTEST_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+  const std::string full_flag =
+    (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
+
+  Message env_var;
+
+  for (size_t i = 0; i != full_flag.length(); i++) {
+    env_var << ToUpper(full_flag.c_str()[i]);
+  }
+
+  return env_var.GetString();
+}
+
+// Parses 'str' for a 32-bit signed integer.  If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = NULL;
+  const long long_value = strtol(str, &end, 10);  // NOLINT
+
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    Message msg;
+    msg << "WARNING: " << src_text
+        << " is expected to be a 32-bit integer, but actually"
+        << " has value \"" << str << "\".\n";
+    printf("%s", msg.GetString().c_str());
+    fflush(stdout);
+    return false;
+  }
+
+  // Is the parsed value in the range of an Int32?
+  const Int32 result = static_cast<Int32>(long_value);
+
+  if (long_value == LONG_MAX || long_value == LONG_MIN ||
+      // The parsed value overflows as a long.  (strtol() returns
+      // LONG_MAX or LONG_MIN when the input overflows.)
+      result != long_value
+      // The parsed value overflows as an Int32.
+     ) {
+    Message msg;
+    msg << "WARNING: " << src_text
+        << " is expected to be a 32-bit integer, but actually"
+        << " has value " << str << ", which overflows.\n";
+    printf("%s", msg.GetString().c_str());
+    fflush(stdout);
+    return false;
+  }
+
+  *value = result;
+  return true;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromGTestEnv(const char* flag, bool default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = posix::GetEnv(env_var.c_str());
+  return string_value == NULL ?
+         default_value : strcmp(string_value, "0") != 0;
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const string_value = posix::GetEnv(env_var.c_str());
+
+  if (string_value == NULL) {
+    // The environment variable is not set.
+    return default_value;
+  }
+
+  Int32 result = default_value;
+
+  if (!ParseInt32(Message() << "Environment variable " << env_var,
+                  string_value, &result)) {
+    printf("The default value %s is used.\n",
+           (Message() << default_value).GetString().c_str());
+    fflush(stdout);
+    return default_value;
+  }
+
+  return result;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromGTestEnv(const char* flag, const char* default_value) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const value = posix::GetEnv(env_var.c_str());
+  return value == NULL ? default_value : value;
+}
+
+}  // namespace internal
+}  // namespace testing
@@ -0,0 +1,395 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Google Test - The Google C++ Testing Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// It uses the << operator when possible, and prints the bytes in the
+// object otherwise.  A user can override its behavior for a class
+// type Foo by defining either operator<<(::std::ostream&, const Foo&)
+// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
+// defines Foo.
+
+#include "gtest/gtest-printers.h"
+#include <ctype.h>
+#include <stdio.h>
+#include <ostream>  // NOLINT
+#include <string>
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+namespace {
+
+using ::std::ostream;
+
+// Prints a segment of bytes in the given object.
+void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
+                                size_t count, ostream* os) {
+  char text[5] = "";
+
+  for (size_t i = 0; i != count; i++) {
+    const size_t j = start + i;
+
+    if (i != 0) {
+      // Organizes the bytes into groups of 2 for easy parsing by
+      // human.
+      if ((j % 2) == 0) {
+        *os << ' ';
+      }
+      else {
+        *os << '-';
+      }
+    }
+
+    GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
+    *os << text;
+  }
+}
+
+// Prints the bytes in the given value to the given ostream.
+void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
+                              ostream* os) {
+  // Tells the user how big the object is.
+  *os << count << "-byte object <";
+
+  const size_t kThreshold = 132;
+  const size_t kChunkSize = 64;
+
+  // If the object size is bigger than kThreshold, we'll have to omit
+  // some details by printing only the first and the last kChunkSize
+  // bytes.
+  // TODO(wan): let the user control the threshold using a flag.
+  if (count < kThreshold) {
+    PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
+  }
+  else {
+    PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
+    *os << " ... ";
+    // Rounds up to 2-byte boundary.
+    const size_t resume_pos = (count - kChunkSize + 1) / 2 * 2;
+    PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
+  }
+
+  *os << ">";
+}
+
+}  // namespace
+
+namespace internal2 {
+
+// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
+// given object.  The delegation simplifies the implementation, which
+// uses the << operator and thus is easier done outside of the
+// ::testing::internal namespace, which contains a << operator that
+// sometimes conflicts with the one in STL.
+void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
+                          ostream* os) {
+  PrintBytesInObjectToImpl(obj_bytes, count, os);
+}
+
+}  // namespace internal2
+
+namespace internal {
+
+// Depending on the value of a char (or wchar_t), we print it in one
+// of three formats:
+//   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
+//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
+//   - as a special escape sequence (e.g. '\r', '\n').
+enum CharFormat {
+  kAsIs,
+  kHexEscape,
+  kSpecialEscape
+};
+
+// Returns true if c is a printable ASCII character.  We test the
+// value of c directly instead of calling isprint(), which is buggy on
+// Windows Mobile.
+inline bool IsPrintableAscii(wchar_t c) {
+  return 0x20 <= c && c <= 0x7E;
+}
+
+// Prints a wide or narrow char c as a character literal without the
+// quotes, escaping it when necessary; returns how c was formatted.
+// The template argument UnsignedChar is the unsigned version of Char,
+// which is the type of c.
+template <typename UnsignedChar, typename Char>
+static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
+  switch (static_cast<wchar_t>(c)) {
+    case L'\0':
+      *os << "\\0";
+      break;
+
+    case L'\'':
+      *os << "\\'";
+      break;
+
+    case L'\\':
+      *os << "\\\\";
+      break;
+
+    case L'\a':
+      *os << "\\a";
+      break;
+
+    case L'\b':
+      *os << "\\b";
+      break;
+
+    case L'\f':
+      *os << "\\f";
+      break;
+
+    case L'\n':
+      *os << "\\n";
+      break;
+
+    case L'\r':
+      *os << "\\r";
+      break;
+
+    case L'\t':
+      *os << "\\t";
+      break;
+
+    case L'\v':
+      *os << "\\v";
+      break;
+
+    default:
+      if (IsPrintableAscii(c)) {
+        *os << static_cast<char>(c);
+        return kAsIs;
+      }
+      else {
+        *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
+        return kHexEscape;
+      }
+  }
+
+  return kSpecialEscape;
+}
+
+// Prints a wchar_t c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+  switch (c) {
+    case L'\'':
+      *os << "'";
+      return kAsIs;
+
+    case L'"':
+      *os << "\\\"";
+      return kSpecialEscape;
+
+    default:
+      return PrintAsCharLiteralTo<wchar_t>(c, os);
+  }
+}
+
+// Prints a char c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
+  return PrintAsStringLiteralTo(
+           static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
+}
+
+// Prints a wide or narrow character c and its code.  '\0' is printed
+// as "'\\0'", other unprintable characters are also properly escaped
+// using the standard C++ escape sequence.  The template argument
+// UnsignedChar is the unsigned version of Char, which is the type of c.
+template <typename UnsignedChar, typename Char>
+void PrintCharAndCodeTo(Char c, ostream* os) {
+  // First, print c as a literal in the most readable form we can find.
+  *os << ((sizeof(c) > 1) ? "L'" : "'");
+  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
+  *os << "'";
+
+  // To aid user debugging, we also print c's code in decimal, unless
+  // it's 0 (in which case c was printed as '\\0', making the code
+  // obvious).
+  if (c == 0) {
+    return;
+  }
+
+  *os << " (" << static_cast<int>(c);
+
+  // For more convenience, we print c's code again in hexidecimal,
+  // unless c was already printed in the form '\x##' or the code is in
+  // [1, 9].
+  if (format == kHexEscape || (1 <= c && c <= 9)) {
+    // Do nothing.
+  }
+  else {
+    *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
+  }
+
+  *os << ")";
+}
+
+void PrintTo(unsigned char c, ::std::ostream* os) {
+  PrintCharAndCodeTo<unsigned char>(c, os);
+}
+void PrintTo(signed char c, ::std::ostream* os) {
+  PrintCharAndCodeTo<unsigned char>(c, os);
+}
+
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
+void PrintTo(wchar_t wc, ostream* os) {
+  PrintCharAndCodeTo<wchar_t>(wc, os);
+}
+
+// Prints the given array of characters to the ostream.  CharType must be either
+// char or wchar_t.
+// The array starts at begin, the length is len, it may include '\0' characters
+// and may not be NUL-terminated.
+template <typename CharType>
+static void PrintCharsAsStringTo(
+  const CharType* begin, size_t len, ostream* os) {
+  const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
+  *os << kQuoteBegin;
+  bool is_previous_hex = false;
+
+  for (size_t index = 0; index < len; ++index) {
+    const CharType cur = begin[index];
+
+    if (is_previous_hex && IsXDigit(cur)) {
+      // Previous character is of '\x..' form and this character can be
+      // interpreted as another hexadecimal digit in its number. Break string to
+      // disambiguate.
+      *os << "\" " << kQuoteBegin;
+    }
+
+    is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
+  }
+
+  *os << "\"";
+}
+
+// Prints a (const) char/wchar_t array of 'len' elements, starting at address
+// 'begin'.  CharType must be either char or wchar_t.
+template <typename CharType>
+static void UniversalPrintCharArray(
+  const CharType* begin, size_t len, ostream* os) {
+  // The code
+  //   const char kFoo[] = "foo";
+  // generates an array of 4, not 3, elements, with the last one being '\0'.
+  //
+  // Therefore when printing a char array, we don't print the last element if
+  // it's '\0', such that the output matches the string literal as it's
+  // written in the source code.
+  if (len > 0 && begin[len - 1] == '\0') {
+    PrintCharsAsStringTo(begin, len - 1, os);
+    return;
+  }
+
+  // If, however, the last element in the array is not '\0', e.g.
+  //    const char kFoo[] = { 'f', 'o', 'o' };
+  // we must print the entire array.  We also print a message to indicate
+  // that the array is not NUL-terminated.
+  PrintCharsAsStringTo(begin, len, os);
+  *os << " (no terminating NUL)";
+}
+
+// Prints a (const) char array of 'len' elements, starting at address 'begin'.
+void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) wchar_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints the given C string to the ostream.
+void PrintTo(const char* s, ostream* os) {
+  if (s == NULL) {
+    *os << "NULL";
+  }
+  else {
+    *os << ImplicitCast_<const void*>(s) << " pointing to ";
+    PrintCharsAsStringTo(s, strlen(s), os);
+  }
+}
+
+// MSVC compiler can be configured to define whar_t as a typedef
+// of unsigned short. Defining an overload for const wchar_t* in that case
+// would cause pointers to unsigned shorts be printed as wide strings,
+// possibly accessing more memory than intended and causing invalid
+// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
+// wchar_t is implemented as a native type.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Prints the given wide C string to the ostream.
+void PrintTo(const wchar_t* s, ostream* os) {
+  if (s == NULL) {
+    *os << "NULL";
+  }
+  else {
+    *os << ImplicitCast_<const void*>(s) << " pointing to ";
+    PrintCharsAsStringTo(s, wcslen(s), os);
+  }
+}
+#endif  // wchar_t is native
+
+// Prints a ::string object.
+#if GTEST_HAS_GLOBAL_STRING
+void PrintStringTo(const ::string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+void PrintStringTo(const ::std::string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
+// Prints a ::wstring object.
+#if GTEST_HAS_GLOBAL_WSTRING
+void PrintWideStringTo(const ::wstring& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+#if GTEST_HAS_STD_WSTRING
+void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+}  // namespace internal
+
+}  // namespace testing
@@ -0,0 +1,112 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// The Google C++ Testing Framework (Google Test)
+
+#include "gtest/gtest-test-part.h"
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "src/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+
+using internal::GetUnitTestImpl;
+
+// Gets the summary of the failure message by omitting the stack trace
+// in it.
+std::string TestPartResult::ExtractSummary(const char* message) {
+  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
+  return stack_trace == NULL ? message :
+         std::string(message, stack_trace);
+}
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
+  return os
+         << result.file_name() << ":" << result.line_number() << ": "
+         << (result.type() == TestPartResult::kSuccess ? "Success" :
+             result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
+             "Non-fatal failure") << ":\n"
+         << result.message() << std::endl;
+}
+
+// Appends a TestPartResult to the array.
+void TestPartResultArray::Append(const TestPartResult& result) {
+  array_.push_back(result);
+}
+
+// Returns the TestPartResult at the given index (0-based).
+const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
+  if (index < 0 || index >= size()) {
+    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
+    internal::posix::Abort();
+  }
+
+  return array_[index];
+}
+
+// Returns the number of TestPartResult objects in the array.
+int TestPartResultArray::size() const {
+  return static_cast<int>(array_.size());
+}
+
+namespace internal {
+
+HasNewFatalFailureHelper::HasNewFatalFailureHelper()
+  : has_new_fatal_failure_(false),
+    original_reporter_(GetUnitTestImpl()->
+                       GetTestPartResultReporterForCurrentThread()) {
+  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
+}
+
+HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
+  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
+    original_reporter_);
+}
+
+void HasNewFatalFailureHelper::ReportTestPartResult(
+  const TestPartResult& result) {
+  if (result.fatally_failed()) {
+    has_new_fatal_failure_ = true;
+  }
+
+  original_reporter_->ReportTestPartResult(result);
+}
+
+}  // namespace internal
+
+}  // namespace testing
@@ -0,0 +1,117 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#include "gtest/gtest-typed-test.h"
+#include "gtest/gtest.h"
+
+namespace testing {
+namespace internal {
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// Skips to the first non-space char in str. Returns an empty string if str
+// contains only whitespace characters.
+static const char* SkipSpaces(const char* str) {
+  while (IsSpace(*str)) {
+    str++;
+  }
+
+  return str;
+}
+
+// Verifies that registered_tests match the test names in
+// defined_test_names_; returns registered_tests if successful, or
+// aborts the program otherwise.
+const char* TypedTestCasePState::VerifyRegisteredTestNames(
+  const char* file, int line, const char* registered_tests) {
+  typedef ::std::set<const char*>::const_iterator DefinedTestIter;
+  registered_ = true;
+
+  // Skip initial whitespace in registered_tests since some
+  // preprocessors prefix stringizied literals with whitespace.
+  registered_tests = SkipSpaces(registered_tests);
+
+  Message errors;
+  ::std::set<std::string> tests;
+
+  for (const char* names = registered_tests; names != NULL;
+       names = SkipComma(names)) {
+    const std::string name = GetPrefixUntilComma(names);
+
+    if (tests.count(name) != 0) {
+      errors << "Test " << name << " is listed more than once.\n";
+      continue;
+    }
+
+    bool found = false;
+
+    for (DefinedTestIter it = defined_test_names_.begin();
+         it != defined_test_names_.end();
+         ++it) {
+      if (name == *it) {
+        found = true;
+        break;
+      }
+    }
+
+    if (found) {
+      tests.insert(name);
+    }
+    else {
+      errors << "No test named " << name
+             << " can be found in this test case.\n";
+    }
+  }
+
+  for (DefinedTestIter it = defined_test_names_.begin();
+       it != defined_test_names_.end();
+       ++it) {
+    if (tests.count(*it) == 0) {
+      errors << "You forgot to list test " << *it << ".\n";
+    }
+  }
+
+  const std::string& errors_str = errors.GetString();
+
+  if (errors_str != "") {
+    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+            errors_str.c_str());
+    fflush(stderr);
+    posix::Abort();
+  }
+
+  return registered_tests;
+}
+
+#endif  // GTEST_HAS_TYPED_TEST_P
+
+}  // namespace internal
+}  // namespace testing
@@ -0,0 +1,50 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdio.h>
+
+int hsa_arg_cnt;
+char* hsa_arg_list[32];
+
+#include "gtest/gtest.h"
+
+GTEST_API_ int main(int argc, char** argv) {
+
+  printf("Running main() from gtest_main.cc\n");
+  testing::InitGoogleTest(&argc, argv);
+
+  // Copy cmdline args for Hsa into a globally visible symbol
+  hsa_arg_cnt = argc;
+
+  for (int jdx = 0; jdx < argc; jdx++) {
+    hsa_arg_list[jdx] = argv[jdx];
+  }
+
+  return RUN_ALL_TESTS();
+}
@@ -0,0 +1,254 @@
+#
+# Minimum version of cmake required
+#
+cmake_minimum_required(VERSION 2.8.0)
+
+#
+# GCC 4.8 or higher compiler required.
+#
+
+#
+#   Setup build environment
+# 
+#   1) Set env. variable specifying the location of ROCR header files
+#
+#      export ROCR_DIR="Root for RocR install"
+#
+#   2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
+#      If not set, the default value is "Debug" is bound.
+# 
+#      export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
+#
+#   3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
+#      If not set, the default value of "64" is bound.
+# 
+#       export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
+#
+#   4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
+#      gfx900, ...)
+#
+#   5) Set env. variables OPENC_DIR and and OPENC_VER to the OpenCL install
+#      root and OpenCL version, respectively.
+#
+#   Building rocrtst Suite
+# 
+#   1) Create build folder e.g. "rocrtst/build" - any name will do
+#   2) Cd into build folder
+#   3) Run "cmake .."
+#   4) Run "make"
+#
+
+#############################
+# COMMON AREA
+#############################
+#
+# Currently support for Windows platform is not present
+#
+if(WIN32)
+  message("This sample is not supported on Windows platform")
+  return()
+endif()
+
+#
+# Process environment variables relating to Build type, size and RT version
+#
+string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
+if("${tmp}" STREQUAL release)
+  set(BUILD_TYPE "Release")
+  set(ISDEBUG 0)
+else()
+  set(BUILD_TYPE "Debug")
+  set(ISDEBUG 1)
+endif()
+
+if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
+  set (ONLY64STR "")
+  set (IS64BIT 0)
+else()
+  set (ONLY64STR "64")
+  set (IS64BIT 1)
+endif()
+
+set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include)
+set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
+
+#
+# Determine ROCR Header files are present
+#
+if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
+  message("ERROR: Environment variable ROCR_INC_DIR pointing to ROCR headers is not set")
+  return()
+endif()
+
+# Determine ROCR Library files are present
+#
+if (${IS64BIT} EQUAL 0)
+  if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
+    message("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
+    return()
+  endif()
+else()
+  if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
+    message("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
+    return()
+  endif()
+endif()
+
+if (DEFINED ENV{OPENCL_DIR})
+  set(CLANG $ENV{OPENCL_DIR}/bin/x86_64/clang)
+  set(OPENCL_DIR $ENV{OPENCL_DIR})
+  if (NOT EXISTS ${CLANG})
+    message("ERROR: path to clang (${CLANG}) is not valid. Is env. variable OPENCL_DIR correct?")
+    return()
+  endif()
+
+  if (DEFINED ENV{OPENCL_VER})
+    set(OPENCL_VER $ENV{OPENCL_VER})
+  else()
+    message("OPENCL_VER environment variable is not set. Using default")
+    set(OPENCL_VER "2.0")
+  endif()
+else()
+    message("WARNING: OPENCL_DIR environment variable is not set. Kernels will not be built.")
+endif()
+
+if (DEFINED ENV{TARGET_DEVICE})
+  set(TARGET_DEVICE $ENV{TARGET_DEVICE})
+else()
+  message("ERROR: TARGET_DEVICE environment variable is not defined.")
+  message("Please define a valid clang target (e.g., gfx803, gfx900,...).")
+  return()
+endif() 
+
+#
+# Set Name for Samples Project
+#
+
+set(PROJECT_NAME "sample${ONLY64STR}")
+project (${PROJECT_NAME})
+
+#
+# Print out the build configuration being used:
+#
+#   Build Src directory
+#   Build Binary directory
+#   Build Type: Debug Vs Release, 32 Vs 64
+#   Compiler Version, etc
+#
+message("")
+message("Build Configuration:")
+message("-------------IS64BIT: " ${IS64BIT})
+message("-----------BuildType: " ${BUILD_TYPE})
+message("------------Compiler: " ${CMAKE_CXX_COMPILER})
+message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
+message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
+message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
+message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
+message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
+message("-------Target Device: " ${TARGET_DEVICE})
+message("----------Clang path: " ${CLANG})
+message("-------OpenCL version " ${OPENCL_VER})
+message("")
+
+#
+# Set the build type based on user input
+#
+set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
+#
+# Flag to enable / disable verbose output.
+#
+SET( CMAKE_VERBOSE_MAKEFILE on )
+#
+# Compiler pre-processor definitions.
+#
+# Define MACRO "DEBUG" if build type is "Debug"
+if(${BUILD_TYPE} STREQUAL "Debug")
+add_definitions(-DDEBUG)
+endif()
+
+add_definitions(-D__linux__)
+add_definitions(-DLITTLEENDIAN_CPU=1)
+
+#
+# Linux Compiler options
+#
+set(CMAKE_CXX_FLAGS "-std=c++11 ")
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+
+#
+# Extend the compiler flags for 64-bit builds
+#
+if (IS64BIT) 
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64  -msse -msse2")
+else()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
+endif()
+
+#
+# Add compiler flags to include symbol information for debug builds
+#
+if(ISDEBUG)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
+endif()
+message("ISDEBUG STEP:Done")
+
+#
+# Linux Linker options
+#
+#set(CMAKE_EXE_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl ")
+#set(CMAKE_EXE_LINKER_FLAGS "-Wl,-soname=$(CORE_RUNTIME_NAME).so.1 ")
+
+#
+# Specify the directory containing various libraries of ROCR
+# to be linked against for building ROC Perf applications
+#
+link_directories(${ROCR_LIB_DIR})
+
+#
+# Extend the list of libraries to be used for linking ROC Perf Apps
+#
+set(ROCR_LIBS ${ROCR_LIBS} hsa-runtime${ONLY64STR})
+set(ROCR_LIBS ${ROCR_LIBS} hsa-runtime-tools${ONLY64STR})
+message(${ROCR_LIBS})
+
+include_directories(${ROCR_INC_DIR})
+include_directories($ENV{OPENCL_DIR}/include/opencl$ENV{OPENCL_VER})
+
+function(process_sample S_NAME)
+  set(SNAME_EXE "${S_NAME}_${PROJECT_NAME}")
+  set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")
+  set(sample_kernels sampleKernels)
+  separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEVICE} -include ${OPENCL_DIR}/include/opencl-c.h ${BITCODE_LIBS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${PROJECT_BINARY_DIR}/${SNAME_KERNEL}")
+  add_custom_target(sample_kernels ${CLANG} ${CLANG_ARG_LIST}
+     COMMENT "BUILDING KERNEL..."
+     VERBATIM)
+  aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/${S_NAME} S_NAME_SOURCES)
+  add_executable(${SNAME_EXE} ${S_NAME_SOURCES})
+  target_link_libraries(${SNAME_EXE} ${ROCR_LIBS} c stdc++ dl pthread rt)
+endfunction(process_sample)
+###########################
+# SAMPLE SPECIFIC SECTION 
+###########################
+set(KERN_SUFFIX "kernels.hsaco")
+set(BITCODE_PREF "-Xclang -mlink-bitcode-file -Xclang")
+set(BITCODE_PREF "${BITCODE_PREF} ${OPENCL_DIR}/lib/x86_64/bitcode")
+
+# Binary Search
+set(BITCODE_LIBS "${BITCODE_PREF}/opencl.amdgcn.bc")
+set(BITCODE_LIBS "${BITCODE_LIBS} ${BITCODE_PREF}/ockl.amdgcn.bc")
+set(BITCODE_LIBS "${BITCODE_LIBS} ${BITCODE_PREF}/ocml.amdgcn.bc")
+set(CL_FILE_LIST "${PROJECT_SOURCE_DIR}/binary_search/binary_search_kernels.cl")
+process_sample("binary_search")
+
+install(TARGETS ${SAMPLE_EXE}
+        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
+        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
+        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,19 @@
+
+
+To build the sample, first export the following environment variables:
+
+export ROCR_DIR=<root of RocR install; for RocR includes and libraries>
+export OPENCL_DIR=<root of OpenCL install; for required clang and bitcode libs>
+export OPENCL_VER=<OpenCL version; e.g., "2.0">
+export TARGET_DEVICE=<GPU type; e.g., "gfx803" or "gfx900">
+
+Next, do the following:
+mkdir build
+cd build
+cmake ..
+
+Finally, do the following to build the application and respective kernels:
+
+make
+make sample_kernels
+
@@ -0,0 +1,881 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string>
+#include <iostream>
+#include <climits>
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+
+#define RET_IF_HSA_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
+              __FILE__ << ". Call returned " << err << std::endl; \
+    return (err); \
+  } \
+}
+
+static const uint32_t kBinarySearchLength = 512;
+static const uint32_t kBinarySearchFindMe = 108;
+static const uint32_t kWorkGroupSize = 256;
+
+// Hold all the info specific to binary search
+typedef struct BinarySearch {
+  // Binary Search parameters
+  uint32_t length;
+  uint32_t work_group_size;
+  uint32_t work_grid_size;
+  uint32_t num_sub_divisions;
+  uint32_t find_me;
+
+  // Buffers needed for this application
+  uint32_t* input;
+  uint32_t* input_arr;
+  uint32_t* input_arr_local;
+  uint32_t* output;
+  // Keneral argument buffers and addresses
+  void* kern_arg_buffer;  // Begin of allocated memory
+  //  this pointer to be deallocated
+  void* kern_arg_address;  // Properly aligned address to be used in aql
+  // packet (don't use for deallocation)
+
+  // Kernel code
+  std::string kernel_file_name;
+  std::string kernel_name;
+  uint32_t kernarg_size;
+  uint32_t kernarg_align;
+
+  // HSA/RocR objects needed for this application
+  hsa_agent_t gpu_dev;
+  hsa_agent_t cpu_dev;
+  hsa_signal_t signal;
+  hsa_queue_t* queue;
+  hsa_amd_memory_pool_t cpu_pool;
+  hsa_amd_memory_pool_t gpu_pool;
+  hsa_amd_memory_pool_t kern_arg_pool;
+
+  // Other items we need to populate AQL packet
+  uint64_t kernel_object;
+  uint32_t group_segment_size;   ///< Kernel group seg size
+  uint32_t private_segment_size;   ///< Kernel private seg size
+
+
+} BinarySearch;
+
+void InitializeBinarySearch(BinarySearch* bs) {
+  bs->kernel_file_name = "./binary_search_kernels.hsaco";
+  bs->kernel_name = "binarySearch";
+  bs->length = 512;
+  bs->find_me = 108;
+  bs->work_group_size = 256;
+  bs->num_sub_divisions = bs->length / bs->work_group_size;
+}
+
+// This function is called by the call-back functions used to find an agent of
+// the specified hsa_device_type_t. Note that it cannot be called directly from
+// hsa_iterate_agents() as it does not match the prototype of the call-back
+// function. It must be wrapped by a function with the correct prototype.
+//
+// Return values:
+//  HSA_STATUS_INFO_BREAK -- "agent" is of the specified type (dev_type)
+//  HSA_STATUS_SUCCESS -- "agent" is not of the specified type
+//  Other -- Some error occurred
+static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
+                              hsa_device_type_t dev_type) {
+  assert(data != nullptr);
+
+  if (data == nullptr) {
+    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  // See if the provided agent matches the input type (dev_type)
+  hsa_device_type_t hsa_device_type;
+  hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
+                                &hsa_device_type);
+  RET_IF_HSA_ERR(hsa_error_code);
+
+  if (hsa_device_type == dev_type) {
+    *(reinterpret_cast<hsa_agent_t*>(data)) = agent;
+    return HSA_STATUS_INFO_BREAK;
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// This is the call-back function used to find a GPU type agent. Note that the
+// prototype of this function is dictated by the HSA specification
+hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
+  return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
+}
+
+// This is the call-back function used to find a CPU type agent. Note that the
+// prototype of this function is dictated by the HSA specification
+hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
+  return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
+}
+
+// Find the CPU and GPU agents we need to run this sample, and save them in the
+// BinarySearch structure for later use.
+hsa_status_t FindDevices(BinarySearch* bs) {
+  hsa_status_t err;
+
+  // Note that hsa_iterate_agents iterate through all known agents until
+  // HSA_STATUS_SUCCESS is not returned. The call-backs are implemented such
+  // that HSA_STATUS_INFO_BREAK means we found an agent of the specified type.
+  // This value is returned by hsa_iterate_agents.
+  bs->gpu_dev.handle = 0;
+  err = hsa_iterate_agents(FindGPUDevice, &bs->gpu_dev);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    return HSA_STATUS_ERROR;
+  }
+
+  bs->cpu_dev.handle = 0;
+  err = hsa_iterate_agents(FindCPUDevice, &bs->cpu_dev);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    return HSA_STATUS_ERROR;
+  }
+
+  if (0 == bs->gpu_dev.handle) {
+    std::cout << "GPU Device is not Created properly!" << std::endl;
+    RET_IF_HSA_ERR(HSA_STATUS_ERROR);
+  }
+
+  if (0 == bs->cpu_dev.handle) {
+    std::cout << "CPU Device is not Created properly!" << std::endl;
+    RET_IF_HSA_ERR(HSA_STATUS_ERROR);
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// This function checks to see if the provided
+// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
+// the function adds an additional requirement that the pool have the
+// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
+// pools must NOT have this property.
+// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
+// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
+// no pool was found meeting the requirements. If an error is encountered, we
+// return that error.
+
+// Note that this function does not match the required prototype for the
+// hsa_amd_agent_iterate_memory_pools call back function, and therefore must be
+// wrapped by a function with the correct prototype.
+static hsa_status_t
+FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
+  hsa_status_t err;
+  hsa_amd_segment_t segment;
+  uint32_t flag;
+
+  if (nullptr == data) {
+    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
+                                     &segment);
+  RET_IF_HSA_ERR(err);
+
+  if (HSA_AMD_SEGMENT_GLOBAL != segment) {
+    return HSA_STATUS_SUCCESS;
+  }
+
+  err = hsa_amd_memory_pool_get_info(pool,
+                                     HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
+  RET_IF_HSA_ERR(err);
+
+  uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
+
+  if ((karg_st == 0 && kern_arg) ||
+      (karg_st != 0 && !kern_arg)) {
+    return HSA_STATUS_SUCCESS;
+  }
+
+  *(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
+  return HSA_STATUS_INFO_BREAK;
+}
+
+// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
+// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
+// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
+hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
+  return FindGlobalPool(pool, data, false);
+}
+
+// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
+// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
+// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
+hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
+  return FindGlobalPool(pool, data, true);
+}
+
+// Find memory pools that we will need to allocate from for this sample
+// application. We will need memory associated with the host CPU, the GPU
+// executing the kernels, and for kernel arguments. This function will
+// save the found pools to the BinarySearch structure for use elsewhere
+// in this program.
+hsa_status_t FindPools(BinarySearch* bs) {
+  hsa_status_t err;
+
+  err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool,
+        &bs->cpu_pool);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    return HSA_STATUS_ERROR;
+  }
+
+  err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool,
+        &bs->gpu_pool);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    return HSA_STATUS_ERROR;
+  }
+
+  err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev,
+        FindKernArgPool, &bs->kern_arg_pool);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    return HSA_STATUS_ERROR;
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// Once the needed memory pools have been found and the BinarySearch structure
+// has been updated with these handles, this function is then used to allocate
+// memory from those pools.
+// Devices with which a pool is associated already have access to the pool.
+// However, other devices may also need to read or write to that memory. Below,
+// we see how we can grant access to other devices to address this issue.
+hsa_status_t AllocateAndInitBuffers(BinarySearch* bs) {
+  hsa_status_t err;
+  uint32_t out_length = 4 * sizeof(uint32_t);
+  uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
+
+  // In all of these examples, we want both the cpu and gpu to have access to
+  // the buffer in question. We use the array of agents below in the susequent
+  // calls to hsa_amd_agents_allow_access() for this purpose.
+  hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
+
+  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
+                                     reinterpret_cast<void**>(&bs->input));
+  RET_IF_HSA_ERR(err);
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input);
+  RET_IF_HSA_ERR(err);
+  (void)memset(bs->input, 0, in_length);
+
+  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0,
+                                     reinterpret_cast<void**>(&bs->output));
+  RET_IF_HSA_ERR(err);
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output);
+  RET_IF_HSA_ERR(err);
+  (void)memset(bs->input, 0, in_length);
+
+  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
+                                     reinterpret_cast<void**>(&bs->input_arr));
+  RET_IF_HSA_ERR(err);
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr);
+  RET_IF_HSA_ERR(err);
+  (void)memset(bs->input, 0, in_length);
+
+  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
+                                     reinterpret_cast<void**>(&bs->input_arr_local));
+  RET_IF_HSA_ERR(err);
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local);
+  RET_IF_HSA_ERR(err);
+
+  // Binary-search application specific code...
+  // Initialize input buffer with random values in an increasing order
+  uint32_t max = bs->length * 20;
+  bs->input[0] = 0;
+
+  uint32_t seed = (unsigned int)time(NULL);
+  srand(seed);
+
+  for (uint32_t i = 1; i < bs->length; ++i) {
+    bs->input[i] = bs->input[i - 1] +
+                   static_cast<uint32_t>(max * rand_r(&seed) / static_cast<float>(RAND_MAX));
+  }
+
+// #define VERBOSE 1
+#ifdef VERBOSE
+  std::cout << "Input array values:" << std::endl;
+
+  for (uint32_t i = 0; i < bs->length; ++i) {
+    std::cout << "input[" << i << "] = " << bs->input[i] << " ";
+
+    if (i % 4 == 0) {
+      std::cout << std::endl;
+    }
+  }
+
+  std::cout << std::endl;
+#endif
+
+  return err;
+}
+
+// The code in this function illustrates how to load a kernel from
+// pre-compiled code. The goal is to get a handle that can be later
+// used in an AQL packet and also to extract information about kernel
+// that we will need. All of the information hand kernel handle will
+// be saved to the BinarySearch structure. It will be used when we
+// populate the AQL packet.
+hsa_status_t LoadKernelFromObjFile(BinarySearch* bs) {
+  hsa_status_t err;
+  hsa_code_object_reader_t code_obj_rdr = {0};
+  hsa_executable_t executable = {0};
+
+  hsa_file_t file_handle = open(bs->kernel_file_name.c_str(), O_RDONLY);
+
+  if (file_handle == -1) {
+    std::cout << "failed to open " << bs->kernel_file_name.c_str() <<
+              " at line " << __LINE__ << ", errno: " << errno << std::endl;
+    return HSA_STATUS_ERROR;
+  }
+
+  err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
+  RET_IF_HSA_ERR(err);
+  close(file_handle);
+
+  err = hsa_executable_create_alt(HSA_PROFILE_FULL,
+                                  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev,
+        code_obj_rdr, NULL, NULL);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_freeze(executable, NULL);
+  RET_IF_HSA_ERR(err);
+
+  hsa_executable_symbol_t kern_sym;
+  err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(),
+                                  bs->gpu_dev, 0, &kern_sym);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
+                                       &bs->kernel_object);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
+                                       &bs->private_segment_size);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
+                                       &bs->group_segment_size);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &bs->kernarg_size);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_executable_symbol_get_info(kern_sym,
+                                       HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
+                                       &bs->kernarg_align);
+  RET_IF_HSA_ERR(err);
+
+  return err;
+}
+
+// This function shows how to do an asynchronous copy. We have to create a signal
+// and use the signal to notify us when the copy has completed.
+hsa_status_t AgentMemcpy(void* dst, const void* src,
+                         size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
+  hsa_signal_t s;
+  hsa_status_t err;
+
+  err = hsa_signal_create(1, 0, NULL, &s);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
+  RET_IF_HSA_ERR(err);
+
+  if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
+                                UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
+    err = HSA_STATUS_ERROR;
+    std::cout << "Async copy signal error" << std::endl;
+
+    RET_IF_HSA_ERR(err);
+  }
+
+  err = hsa_signal_destroy(s);
+
+  RET_IF_HSA_ERR(err);
+
+  return err;
+}
+
+// AlignDown and AlignUp are 2 utility functions we use to find an aligned
+// boundary either below or above a given value (address). The function will
+// return a value that has the specified alignment.
+static intptr_t
+AlignDown(intptr_t value, size_t alignment) {
+  return (intptr_t) (value & ~(alignment - 1));
+}
+static void*
+AlignUp(void* value, size_t alignment) {
+  return reinterpret_cast<void*>(
+           AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
+                     alignment));
+}
+
+// This function populates the AQL patch with the information
+// we have collected and stored in the BinarySearch structure thus far.
+void PopulateAQLPacket(BinarySearch const* bs,
+                       hsa_kernel_dispatch_packet_t* aql) {
+  aql->header = 0; // Dummy val. for now. Set this right before doorbell ring
+  aql->setup = 1;
+  aql->workgroup_size_x = bs->work_group_size;
+  aql->workgroup_size_y = 1;
+  aql->workgroup_size_z = 1;
+  aql->grid_size_x = bs->work_grid_size;
+  aql->grid_size_y = 1;
+  aql->grid_size_z = 1;
+  aql->private_segment_size = bs->private_segment_size;
+  aql->group_segment_size = bs->group_segment_size;
+  aql->kernel_object = bs->kernel_object;
+  aql->kernarg_address = bs->kern_arg_address;
+  aql->completion_signal = bs->signal;
+
+  return;
+}
+/*
+ * Write everything in the provided AQL packet to the queue except the first 32
+ * bits which include the header and setup fields. That should be done
+ * last.
+ */
+void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
+                     hsa_queue_t* q) {
+
+  void* queue_base = q->base_address;
+  const uint32_t queue_mask = q->size - 1;
+  uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1);
+
+  hsa_kernel_dispatch_packet_t* queue_aql_packet;
+
+  queue_aql_packet =
+    &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
+    [que_idx & queue_mask];
+
+  queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x;
+  queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y;
+  queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z;
+  queue_aql_packet->grid_size_x = in_aql->grid_size_x;
+  queue_aql_packet->grid_size_y = in_aql->grid_size_y;
+  queue_aql_packet->grid_size_z = in_aql->grid_size_z;
+  queue_aql_packet->private_segment_size = in_aql->private_segment_size;
+  queue_aql_packet->group_segment_size = in_aql->group_segment_size;
+  queue_aql_packet->kernel_object = in_aql->kernel_object;
+  queue_aql_packet->kernarg_address = in_aql->kernarg_address;
+  queue_aql_packet->completion_signal = in_aql->completion_signal;
+}
+
+// This function allocates memory from the kern_arg pool we already found, and
+// then sets the argument values needed by the kernel code.
+hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args,
+                                 size_t arg_size, void** aql_buf_ptr) {
+  void* kern_arg_buf = nullptr;
+  hsa_status_t err;
+  size_t buf_size;
+  size_t req_align;
+
+  // The kernel code must be written to memory at the correct alignment. We
+  // already queried the executable to get the correct alignment, which is
+  // stored in bs->kernarg_align. In case the memory returned from
+  // hsa_amd_memory_pool is not of the correct alignment, we request a little
+  // more than what we need in case we need to adjust.
+  req_align = bs->kernarg_align;
+  // Allocate enough extra space for alignment adjustments if ncessary
+  buf_size = arg_size + (req_align << 1);
+
+  err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0,
+                                     reinterpret_cast<void**>(&kern_arg_buf));
+  RET_IF_HSA_ERR(err);
+
+  // Address of the allocated buffer
+  bs->kern_arg_buffer = kern_arg_buf;
+
+  // Addr. of kern arg start.
+  bs->kern_arg_address = AlignUp(kern_arg_buf, req_align);
+
+  assert(arg_size >= bs->kernarg_size);
+  assert(((uintptr_t)bs->kern_arg_address + arg_size) <
+         ((uintptr_t)bs->kern_arg_buffer + buf_size));
+
+  (void)memcpy(bs->kern_arg_address, args, arg_size);
+  RET_IF_HSA_ERR(err);
+
+  // Make sure both the CPU and GPU can access the kernel arguments
+  hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
+  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer);
+  RET_IF_HSA_ERR(err);
+
+  // Save this info in our BinarySearch structure for later.
+  *aql_buf_ptr = bs->kern_arg_address;
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// This wrapper atomically writes the provided header and setup to the
+// provided AQL packet. The provided AQL packet address should be in the
+// queue memory space.
+inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
+                                  hsa_kernel_dispatch_packet_t* queue_packet) {
+  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
+                   header | (setup << 16), __ATOMIC_RELEASE);
+}
+
+// Once all the required data for kernel execution is collected (in this
+// application it is stored in the BinarySearch structure) we can put it in
+// an AQL packet and ring the queue door bell to tell the command processor to
+// execute it.
+hsa_status_t Run(BinarySearch* bs) {
+  hsa_status_t err;
+
+  std::cout << "Executing kernel " << bs->kernel_name << std::endl;
+
+  // Adjust the size of workgroup
+  // This is mostly application specific.
+  if (bs->work_group_size > 64) {
+    bs->work_group_size = 64;
+    bs->num_sub_divisions = bs->length / bs->work_group_size;
+
+    if (bs->num_sub_divisions < bs->work_group_size) {
+      bs->num_sub_divisions = bs->work_group_size;
+    }
+
+    bs->work_grid_size = bs->num_sub_divisions;
+  }
+
+  // Explanation of BinarySearch algorithm.
+  /*
+   * Since a plain binary search on the GPU would not achieve much benefit
+   * over the GPU we are doing an N'ary search. We split the array into N
+   * segments every pass and therefore get log (base N) passes instead of log
+   * (base 2) passes.
+   *
+   * In every pass, only the thread that can potentially have the element we
+   * are looking for writes to the output array. For ex: if we are looking to
+   * find 4567 in the array and every thread is searching over a segment of
+   * 1000 values and the input array is 1, 2, 3, 4,... then the first thread
+   * is searching in 1 to 1000, the second one from 1001 to 2000, etc. The
+   * first one does not write to the output. The second one doesn't either.
+   * The fifth one however is from 4001 to 5000. So it can potentially have
+   * the element 4567 which lies between them.
+   *
+   * This particular thread writes to the output the lower bound, upper bound
+   * and whether the element equals the lower bound element. So, it would be
+   * 4001, 5000, 0
+   *
+   * The next pass would subdivide 4001 to 5000 into smaller segments and
+   * continue the same process from there.
+   *
+   * When a pass returns 1 in the third element, it means the element has been
+   * found and we can stop executing the kernel. If the element is not found,
+   * then the execution stops after looking at segment of size 1.
+   */
+
+  uint32_t global_lower_bound = 0;
+  uint32_t global_upper_bound = bs->length - 1;
+  uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) /
+                          bs->num_sub_divisions;
+
+  if ((bs->input[0] > bs->find_me) ||
+      (bs->input[bs->length - 1] < bs->find_me)) {
+    bs->output[0] = 0;
+    bs->output[1] = bs->length - 1;
+    bs->output[2] = 0;
+    std::cout << "Returning too early" << std::endl;
+    return HSA_STATUS_SUCCESS;
+  }
+
+  bs->output[3] = 1;
+
+  // Setup the kernel args
+  // See the meta-data for the compiled OpenCL kernel code to ascertain
+  // the sizes, padding and alignment required for kernel arguments.
+  // This can be seen by executing
+  // $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
+  // The kernel code will expect the following arguments aligned as shown.
+  typedef uint32_t uint2[2];
+  typedef uint32_t uint4[4];
+  struct __attribute__((aligned(16))) local_args_t {
+    uint4* outputArray;
+    uint2*  sortedArray;
+    uint32_t findMe;
+    uint32_t pad;
+    uint64_t global_offset_x;
+    uint64_t global_offset_y;
+    uint64_t global_offset_z;
+  } local_args;
+
+  local_args.outputArray = reinterpret_cast<uint4*>(bs->output);
+  local_args.sortedArray = reinterpret_cast<uint2*>(bs->input_arr_local);
+  local_args.findMe = bs->find_me;
+  local_args.global_offset_x = 0;
+  local_args.global_offset_y = 0;
+  local_args.global_offset_z = 0;
+
+  // Copy the kernel args structure into kernel arg memory
+  err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args),
+                            &bs->kern_arg_address);
+  RET_IF_HSA_ERR(err);
+
+  // Populate an AQL packet with the info we've gathered
+  hsa_kernel_dispatch_packet_t aql;
+  PopulateAQLPacket(bs, &aql);
+
+  uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
+
+  while ((sub_div_size > 1) && (bs->output[3] != 0)) {
+    for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) {
+      int idx1 = i * sub_div_size;
+      int idx2 = ((i + 1) * sub_div_size) - 1;
+      bs->input_arr[2 * i] = bs->input[idx1];
+      bs->input_arr[2 * i + 1] = bs->input[idx2];
+    }
+
+    // Copy kernel parameter from system memory to local memory
+    err = AgentMemcpy(reinterpret_cast<uint8_t*>(bs->input_arr_local),
+                      reinterpret_cast<uint8_t*>(bs->input_arr),  in_length, bs->gpu_dev,
+                      bs->cpu_dev);
+
+    RET_IF_HSA_ERR(err);
+
+    // Reset output buffer to zero
+    bs->output[3] = 0;
+
+    // Dispatch kernel with global work size, work group size with ONE dimesion
+    // and wait for kernel to complete
+
+    // Compute the write index of queue and copy Aql packet into it
+    uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue);
+
+    const uint32_t mask = bs->queue->size - 1;
+
+    // This function simply copies the data we've collected so far into our
+    // local AQL packet, except the the setup and header fields.
+    WriteAQLToQueue(&aql, bs->queue);
+
+    uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
+    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
+                  HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
+                  HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+    // Set the packet's type, acquire and release fences. This should be done
+    // atomically after all the other fields have been set, using release
+    // memory ordering to ensure all the fields are set when the door bell
+    // signal is activated.
+    void* q_base = bs->queue->base_address;
+
+    AtomicSetPacketHeader(aql_header, aql.setup,
+                          &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(q_base))[que_idx & mask]);
+
+    // Increment the write index and ring the doorbell to dispatch kernel.
+    hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1));
+    hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    // Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of
+    // HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we
+    // have below.
+    // The call below will block until the condition is met. Below we have said
+    // the condition is that the signal value (initiailzed to 1) associated with
+    // the queue is less than 1. When the kernel associated with the queued AQL
+    // packet has completed execution, the signal value is automatically
+    // decremented by the packet processor.
+    hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal,
+                               HSA_SIGNAL_CONDITION_LT, 1,
+                               UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+
+    // value should be 0, or we timed-out
+    if (value) {
+      std::cout << "Timed out waiting for kernel to complete?" << std::endl;
+      RET_IF_HSA_ERR(HSA_STATUS_ERROR);
+    }
+
+    // Reset the signal to its initial value for the next iteration
+    hsa_signal_store_screlease(bs->signal, 1);
+
+    // Binary search algorithm stuff...
+    global_lower_bound = bs->output[0] * sub_div_size;
+    global_upper_bound = global_lower_bound + sub_div_size - 1;
+    sub_div_size = (global_upper_bound - global_lower_bound + 1) /
+                   bs->num_sub_divisions;
+  }
+
+  uint32_t element_index = UINT_MAX;
+
+  for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) {
+    if (bs->input[i] == bs->find_me) {
+      element_index = i;
+      bs->output[0] = i;
+      bs->output[1] = i + 1;
+      bs->output[2] = 1;
+      break;
+    }
+
+    // Element is not found in region specified
+    // by global lower bound to global upper bound
+    bs->output[2] = 0;
+  }
+
+  uint32_t is_elem_found = bs->output[2];
+
+  std::cout << "Lower bound = " << global_lower_bound << std::endl;
+  std::cout << "Upper bound = " << global_upper_bound << std::endl;
+  std::cout << "Element search for = " << bs->find_me << std::endl;
+
+
+  if (is_elem_found == 1) {
+    std::cout << "Element found at index " << element_index << std::endl;
+  }
+  else {
+    std::cout << "Element value " << bs->find_me << " not found" << std::endl;
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// Release all the RocR resources we have acquired in this application.
+hsa_status_t CleanUp(BinarySearch* bs) {
+  hsa_status_t err;
+
+  err = hsa_amd_memory_pool_free(bs->input);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_amd_memory_pool_free(bs->output);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_amd_memory_pool_free(bs->input_arr);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_amd_memory_pool_free(bs->kern_arg_buffer);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_queue_destroy(bs->queue);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_signal_destroy(bs->signal);
+  RET_IF_HSA_ERR(err);
+
+  err = hsa_shut_down();
+  RET_IF_HSA_ERR(err);
+
+  return HSA_STATUS_SUCCESS;
+}
+
+int main(int argc, char* argv[]) {
+  // This BinarySearch structure (bs) below holds all of the appl. specific
+  // info we need to run the sample. This includes algorithm specific
+  // information as well as handles to RocR/HSA objects.
+
+  // The basic structure of this sample is to fill in this structure with the
+  // required RocR/HSA handles to RocR resources (e.g., agents, memory pools,
+  // queues, etc.) and then dispatch the packets to the queue, and examine the
+  // output.
+
+  BinarySearch bs;
+  hsa_status_t err;
+
+  // Set some working values specific to this application
+  InitializeBinarySearch(&bs);
+
+  // hsa_init() initializes internal data structures and causes devices (agents),
+  // memory pools and other resources to be discovered.
+  err = hsa_init();
+  RET_IF_HSA_ERR(err);
+
+  // Find the agents needed for the sample
+  err = FindDevices(&bs);
+  RET_IF_HSA_ERR(err);
+
+  // Create the completion signal used when dispatching a packet
+  err = hsa_signal_create(1, 0, NULL, &bs.signal);
+  RET_IF_HSA_ERR(err);
+
+  // Create a queue to submit our binary search AQL packets
+  err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
+                         UINT32_MAX, UINT32_MAX, &bs.queue);
+  RET_IF_HSA_ERR(err);
+
+  // Find the HSA memory pools we need to run this sample
+  err = FindPools(&bs);
+  RET_IF_HSA_ERR(err);
+
+  // Allocate memory from the correct memory pool, and initialize them as
+  // neeeded for the algorihm.
+  err = AllocateAndInitBuffers(&bs);
+  RET_IF_HSA_ERR(err);
+
+  // Create a kernel object from the pre-compiled kernel, and read some
+  // attributes associated with the kernel that we will need.
+  err = LoadKernelFromObjFile(&bs);
+  RET_IF_HSA_ERR(err);
+
+  // Fill in the AQL packet, assign the kernel arguments, enqueue the packet,
+  // "ring" the doorbell, and wait for completion.
+  err = Run(&bs);
+  RET_IF_HSA_ERR(err);
+
+  // Release all the RocR resources we've acquired and shutdown HSA.
+  err = CleanUp(&bs);
+
+  return 0;
+}
+
+
+#undef RET_IF_HSA_ERR
@@ -0,0 +1,127 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+/**
+ * One instance of this kernel call is a thread.
+ * Each thread finds out the segment in which it should look for the element.
+ * After that, it checks if the element is between the lower bound and upper
+ * bound of its segment. If yes, then this segment becomes the total
+ * searchspace for the next pass.
+ *
+ * To achieve this, it writes the lower bound and upper bound to the output
+ * array. In case the element at the left end (lower bound) matches the element
+ * we are looking for, that is marked in the output and we no longer need to
+ * look any further.
+ */
+ 
+__kernel void
+binarySearch(__global uint4 * outputArray,
+             __const __global uint2  * sortedArray,
+             const   unsigned int findMe) {
+  unsigned int tid = get_global_id(0);
+
+  // Then we find the elements  for this thread
+  uint2 element = sortedArray[tid];
+
+
+  // If the element to be found does not lie between
+  // them, then nothing left to do in this thread
+  if((element.x > findMe) || (element.y < findMe)) {
+    return;
+  } else {
+    // However, if the element does lie between the lower
+    // and upper bounds of this thread's searchspace
+    // we need to narrow down the search further in this
+    // search space 
+    // The search space for this thread is marked in the
+    // output as being the total search space for the next pass
+    outputArray[0].x = tid;
+    outputArray[0].w = 1;
+  }
+}
+
+
+__kernel void
+binarySearch_mulkeys(__global int *keys,
+                     __global uint *input,
+                     const unsigned int numKeys,
+                     __global int *output) {
+
+  int gid = get_global_id(0);
+  int lBound = gid * 256;
+  int uBound = lBound + 255;
+
+  for(int i = 0; i < numKeys; i++) {
+    if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
+      output[i]=lBound;
+  }
+
+}
+
+
+__kernel void
+binarySearch_mulkeysConcurrent(__global uint *keys,
+                               __global uint *input,
+                               const unsigned int inputSize, // num. of inputs
+                               const unsigned int numSubdivisions,
+                               __global int *output) {
+
+  int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
+  int uBound = lBound + inputSize / numSubdivisions;
+  int myKey = keys[get_global_id(0) / numSubdivisions];
+  int mid;
+
+  while(uBound >= lBound) {
+    mid = (lBound + uBound) / 2;
+    if(input[mid] == myKey) {
+      output[get_global_id(0) / numSubdivisions] = mid;
+      return;
+    } else if(input[mid] > myKey) {
+      uBound = mid - 1;
+    } else {
+      lBound = mid + 1;
+    }
+  }
+}
@@ -0,0 +1,224 @@
+#
+# Minimum version of cmake required
+#
+cmake_minimum_required(VERSION 2.8.0)
+
+#
+# GCC 4.8 or higher compiler required.
+#
+#   Setup build environment
+#
+#   1) Set env. variable specifying the location of ROCR header files
+#
+#      export ROCR_DIR="Root for RocR install"
+#
+#   2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
+#      If not set, the default value is "Debug" is bound.
+# 
+#      export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
+#
+#   3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
+#      If not set, the default value of "64" is bound.
+# 
+#       export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
+#
+#   4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
+#      gfx900, ...)
+#
+#   5) Set env. variables AMDHSAFIN_DIR and and AMDHSAFIN_TARGET to the 
+#      directory containing the amd finalizer executable and version
+#      (e.g, 8:0:3) respectively.      
+#
+#   Building rocrtst Suite
+# 
+#   1) Create build folder e.g. "rocrtst/build" - any name will do
+#   2) Cd into build folder
+#   3) Run "cmake .."
+#   4) Run "make"
+#
+
+#
+# Currently support for Windows platform is not present
+#
+if(WIN32)
+  MESSAGE("rocrtst Suite is not supported on Windows platform")
+  RETURN()
+endif()
+
+#
+# Process environment variables relating to Build type, size and RT version
+#
+string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
+if("${tmp}" STREQUAL debug)
+  set(BUILD_TYPE "Debug")
+  set(ISDEBUG 1)
+else()
+  set(BUILD_TYPE "Release")
+  set(ISDEBUG 0)
+endif()
+
+if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
+  set (ONLY64STR "")
+  set (IS64BIT 0)
+else()
+  set (ONLY64STR "64")
+  set (IS64BIT 1)
+endif()
+
+set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include) 
+set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
+ 
+#
+# Determine ROCR Header files are present
+#
+if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
+  MESSAGE("ERROR: ${ROCR_INC_DIR}/hsa/hsa.h does not exist. Check ROCR_DIR env. variable.")
+  RETURN()
+endif()
+
+
+# Determine ROCR Library files are present
+#
+if (${IS64BIT} EQUAL 0)
+  if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
+    MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
+    RETURN()
+  endif()
+else()
+  if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
+    MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
+    RETURN()
+  endif()
+endif()
+
+#
+# Set Name for rocrtst Suite Project
+#
+set(ROCRTST_SUITE_NAME "rocrtst${ONLY64STR}")
+project (${ROCRTST_SUITE_NAME})
+
+#
+# Print out the build configuration being used:
+#
+#   Build Src directory
+#   Build Binary directory
+#   Build Type: Debug Vs Release, 32 Vs 64
+#   Compiler Version, etc
+#
+MESSAGE("")
+MESSAGE("-------------IS64BIT: " ${IS64BIT})
+MESSAGE("-----------BuildType: " ${BUILD_TYPE})
+MESSAGE("------------Compiler: " ${CMAKE_CXX_COMPILER})
+MESSAGE("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
+MESSAGE("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
+MESSAGE("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
+MESSAGE("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
+MESSAGE("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
+MESSAGE("")
+
+#
+# Set the build type based on user input
+#
+set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
+#
+# Flag to enable / disable verbose output.
+#
+SET( CMAKE_VERBOSE_MAKEFILE on )
+#
+# Compiler pre-processor definitions.
+#
+# Define MACRO "DEBUG" if build type is "Debug"
+if(${BUILD_TYPE} STREQUAL "Debug")
+add_definitions(-DDEBUG)
+endif()
+
+add_definitions(-D__linux__)
+add_definitions(-DLITTLEENDIAN_CPU=1)
+
+#
+# Linux Compiler options
+#
+set(CMAKE_CXX_FLAGS "-std=c++11 ")
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
+
+
+#
+# Extend the compiler flags for 64-bit builds
+#
+if (IS64BIT) 
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64  -msse -msse2")
+else()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
+endif()
+
+#
+# Add compiler flags to include symbol information for debug builds
+#
+if(ISDEBUG)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
+endif()
+MESSAGE("ISDEBUG STEP:Done")
+
+
+set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+
+# Set Name for Google Test Framework and build it as a
+# static library to be linked with user test programs
+#
+set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}")
+add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest")
+set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME}
+					hsa-runtime-tools${ONLY64STR})
+MESSAGE("ROCRTST_LIBS SET STEP:Done")
+#
+#
+# Other source directories
+aux_source_directory(${ROCRTST_ROOT}/common common_srcs)
+
+#
+# Specify the directory containing various libraries of ROCR
+# to be linked against for building ROC Perf applications
+#
+LINK_DIRECTORIES(${ROCR_LIB_DIR})
+
+#
+# Extend the list of libraries to be used for linking ROC Perf Apps
+#
+set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime${ONLY64STR})
+
+
+# Set Name for rocrtst 
+MESSAGE(${ROCRTST_LIBS})
+set(ROCRTST "rocrtst${ONLY64STR}")
+
+#
+# Sorce files for building rocrtst
+#
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} performanceSources)
+
+
+# Header file include path
+
+include_directories(${ROCR_INC_DIR})
+include_directories(${ROCRTST_ROOT})
+include_directories(${ROCRTST_ROOT}/gtest/include)
+
+# Build rules
+
+add_executable(${ROCRTST} ${performanceSources} ${common_srcs})
+target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt)
+
+INSTALL(TARGETS ${ROCRTST}
+        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
+        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
+        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
+
@@ -0,0 +1,258 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "cp_process_time.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "common/os.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include "hsa/hsa_ext_finalize.h"
+#include <algorithm>
+
+static const uint64_t kKernelIterations = 10000;
+static const uint64_t kTestBadValue = 1234567891234567891;
+//Set up some expectations for reasonable processing times
+//For gfx803, Overhead time had a max of 18.208uS and a min of 7.82uS
+static const double kGfx803MinOverhead = 7.78;
+static const double kGfx803MaxOverhead = 21.064;
+static const double kOverheadToleranceFactor = 0.25;
+
+CpProcessTime::CpProcessTime() :
+  BaseRocR() {
+  // kernel_name_ = "&__simple_kernel";
+  mean_ = 0.0;
+}
+
+CpProcessTime::~CpProcessTime() {
+}
+
+void CpProcessTime::SetUp() {
+  hsa_status_t err;
+  set_kernel_file_name("simple_kernel.o");
+  set_kernel_name("&__simple_kernel");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  // Create a queue
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  ASSERT_NE(q, nullptr);
+  set_main_queue(q);
+
+  // Set profiling
+  err = hsa_amd_profiling_set_profiler_enabled(q, 1);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Load and finalize the kernel
+  err = rocrtst::LoadKernelFromObjFile(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = 1;
+  aql().grid_size_x = 1;
+}
+
+size_t CpProcessTime::RealIterationNum() {
+  return num_iteration() * 1.2 + 1;
+}
+
+void CpProcessTime::Run() {
+  hsa_status_t err;
+  std::vector<double> timer;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  ASSERT_NE(gpu_dev, nullptr);
+  ASSERT_NE(cpu_dev, nullptr);
+  uint32_t it = RealIterationNum();
+
+  typedef struct args_t {
+    uint64_t* iteration;
+    uint64_t* result;
+  } args;
+
+  err = rocrtst::SetPoolsTypical(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  uint64_t* iter = NULL;
+  uint64_t* result = NULL;
+  err = rocrtst::AllocAndAllowAccess(this, sizeof(uint64_t), cpu_pool(),
+                                                               (void**)&iter);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = rocrtst::AllocAndAllowAccess(this, sizeof(uint64_t), cpu_pool(),
+                                                             (void**)&result);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  
+  *iter = kKernelIterations;
+  *result = kTestBadValue;
+
+  args  k_args;
+
+  k_args.iteration = (uint64_t*)iter;
+  k_args.result = (uint64_t*)result;
+
+  err = rocrtst::AllocAndSetKernArgs(this, &k_args, sizeof(args));
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  rocrtst::WriteAQLToQueue(this);
+
+  void * q_base_addr = main_queue()->base_address;
+  const uint32_t queue_mask = main_queue()->size - 1;
+  uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
+//  aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
+//                                    HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+//  aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
+//                                    HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+  for (uint32_t i = 0; i < it; i++) {
+    // uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
+    uint64_t que_idx = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    //Get timing stamp an ring the doorbell to dispatch the kernel.
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+
+    rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
+             &((hsa_kernel_dispatch_packet_t*)(q_base_addr))[que_idx & queue_mask]);
+
+    hsa_queue_store_write_index_relaxed(main_queue(), (que_idx + 1));
+    hsa_signal_store_relaxed(main_queue()->doorbell_signal, que_idx);
+
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+//    hsa_signal_value_t value = hsa_signal_wait_scacquire(signal(),
+//                HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+    // value should be 0, or we timed-out
+    //ASSERT_EQ(value, 0);
+
+    p_timer.StopTimer(id);
+
+    hsa_amd_profiling_dispatch_time_t dispatch_time;
+    err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
+          &dispatch_time);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    uint64_t ticks = dispatch_time.end - dispatch_time.start;
+    uint64_t freq;
+
+    err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    hsa_signal_store_screlease(signal(), 1);
+
+    double execution_time = (double) ticks / freq * 1e6; //convert to us
+    double temp = p_timer.ReadTimer(id) * 1e6;
+    double cp_time = temp - execution_time;
+
+#ifdef DEBUG
+    std::cout << "Total:" << temp << "uS ";
+    std::cout << "Execution:" << execution_time << "uS ";
+    std::cout << "Overhead:" << cp_time << "uS ";
+    std::cout << "Overhead %:" << cp_time / execution_time * 100 << std::endl;
+#endif
+
+    EXPECT_EQ(kKernelIterations, *result);
+    timer.push_back(cp_time);
+
+    //Assume overhead will not deviate too much from previously recorded
+    // values. If this does happen and there is not a performance bug,
+    // modify these constants
+
+    //This may need to be made specific to the gpu being used
+    EXPECT_GT(cp_time, kGfx803MinOverhead * (1 - kOverheadToleranceFactor));
+    EXPECT_LT(cp_time, kGfx803MaxOverhead * (1 + kOverheadToleranceFactor));
+
+    *result = 0;
+  }
+
+  //Abandon the first result and after sort, delete the last 2% value
+  timer.erase(timer.begin());
+  std::sort(timer.begin(), timer.end());
+
+  timer.erase(timer.begin() + num_iteration(), timer.end());
+  mean_ = rocrtst::CalcMean(timer);
+
+  return;
+}
+
+void CpProcessTime::DisplayResults() const {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  if (mean_ == 0.0) {
+    return;
+  }
+
+  std::cout << "===================================================="
+            << std::endl;
+  std::cout << "The average Command Processor processing time is:  " << mean_
+            << "us" << std::endl;
+  std::cout << "===================================================="
+            << std::endl;
+  return;
+}
+
+void CpProcessTime::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
@@ -0,0 +1,91 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_CP_PROCESS_TIME_H__
+#define __ROCRTST_SRC_CP_PROCESS_TIME_H__
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "common/common.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include <vector>
+
+//@Brief: This class is defined to measure the mean latency of launching
+//an empty kernel
+
+class CpProcessTime: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  CpProcessTime();
+
+  //@Brief: Destructor
+  virtual ~CpProcessTime();
+
+  //@Brief: Set up the environment for the test
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Display  results we got
+  virtual void DisplayResults() const;
+
+  //@Brief: Clean up and close the runtime
+  virtual void Close();
+
+ private:
+  //@Brief: Get actual iteration number
+  virtual size_t RealIterationNum();
+
+  //@Brief: Store the size of queue
+  uint32_t queue_size_;
+
+  //@Brief: The mean time of CP Processing
+  double mean_;
+
+};
+
+#endif
+
@@ -0,0 +1,220 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "cu_masking.h"
+#include "common/base_rocr_utils.h"
+#include "gtest/gtest.h"
+
+CuMasking::CuMasking() :
+  BaseRocR() {
+  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
+  mean_ = 0.0;
+  group_region_.handle = 0;
+  cu_ = NULL;
+}
+
+CuMasking::~CuMasking() {
+}
+
+void CuMasking::SetUp() {
+  hsa_status_t err;
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  set_kernel_file_name("cu_masking.o");
+  set_kernel_name("&main");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  // Create a queue
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  set_main_queue(q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  // Fill up the kernel packet except header
+  // aql().completion_signal=signal();
+  // TODO: Will delete manual_input later
+  uint32_t cu_count = 0;
+  err = hsa_agent_get_info(*gpu_dev,
+          (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  std::cout << "CU# is: " << cu_count << std::endl;
+
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = 1024;
+
+  //manual_input * group_input;  // workgroup_max_size;
+  aql().grid_size_x = (long long) 1024 * 640 * 640;
+
+  // TODO:Manully set the max cu number to 8, the api return 10
+  std::cout << "Grid size is: " << aql().grid_size_x << std::endl;
+
+  err = hsa_amd_agent_iterate_memory_pools(*cpu_dev,
+                                        rocrtst::FindGlobalPool, &cpu_pool());
+  ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+}
+
+size_t CuMasking::RealIterationNum() {
+  return num_iteration() * 1.2 + 1;
+}
+
+void CuMasking::Run() {
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::vector<double> timer;
+
+  typedef struct args_t {
+    uint32_t* iteration;
+    uint32_t* result;
+  } local_args;
+
+  uint32_t* iter = NULL;
+  uint32_t* result = NULL;
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(uint32_t), 0,
+                                     (void**) &iter);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(uint32_t), 0,
+                                     (void**) &result);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  *iter = 0xff;
+  *result = 0;
+
+  err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, iter);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, result);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  local_args* kernarg = NULL;
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), kernarg_size(), 0,
+                                     (void**) &kernarg);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, kernarg);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  kernarg->iteration = iter;
+  kernarg->result = result;
+
+  aql().kernarg_address = kernarg;
+
+  // Obtain the current queue write inex.
+  uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+  // Write the aql packet at the calculate queue index address.
+  const uint32_t queue_mask = main_queue()->size - 1;
+
+  // Set CU mask
+  uint32_t cu_mask = 0;
+#if 0
+  std::cout << "Enter cu mask value:" << std::endl;
+  ASSERT_NE(scanf("%d", &cu_mask), EOF);
+#else
+  cu_mask = 0xAAAAAAAA;
+#endif
+
+  std::cout << "Value of bit array is: 0x" << std::hex << cu_mask << std::endl;
+  err = hsa_amd_queue_cu_set_mask(main_queue(), 32, &cu_mask);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  void *q_base_addr = main_queue()->base_address;
+  // Write the aql packet at the calculate queue index address.
+  aql().completion_signal = signal();
+  ((hsa_kernel_dispatch_packet_t*)(q_base_addr))[index & queue_mask] = aql();
+
+  // Get timing stamp an ring the doorbell to dispatch the kernel.
+  rocrtst::PerfTimer p_timer;
+  int id = p_timer.CreateTimer();
+  p_timer.StartTimer(id);
+  ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
+                     HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
+
+  // Wait on the dispatch signal until the kernel is finished.
+  while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                   (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+    ;
+
+  p_timer.StopTimer(id);
+
+  hsa_signal_store_screlease(signal(), 1);
+
+  double t1 = p_timer.ReadTimer(id) * 1e6;
+  std::cout << "Execution time after setting cu masking: " << t1 << std::endl;
+
+  return;
+}
+
+void CuMasking::DisplayResults() const {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "===================================================="
+            << std::endl;
+
+  std::cout << "====================================================="
+            << std::endl;
+  return;
+}
+
+void CuMasking::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
@@ -0,0 +1,103 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_CU_MASKING_TIME_H__
+#define __ROCRTST_SRC_CU_MASKING_TIME_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "common/common.h"
+#include "common/hsatimer.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include "hsa/hsa_ext_finalize.h"
+#include <algorithm>
+#include <vector>
+
+//@Brief: This class is defined to measure the mean latency of launching
+//an empty kernel
+
+class CuMasking: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  CuMasking();
+
+  //@Brief: Destructor
+  virtual ~CuMasking();
+
+  //@Brief: Set up the environment for the test
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Display  results we got
+  virtual void DisplayResults() const;
+
+  //@Brief: Clean up and close the runtime
+  virtual void Close();
+
+ private:
+  //@Brief: Get actual iteration number
+  virtual size_t RealIterationNum();
+
+  //@Brief: Store the size of queue
+  uint32_t queue_size_;
+
+  //@Brief: The mean time of CP Processing
+  double mean_;
+
+  //@Brief: The group memory region
+  hsa_region_t group_region_;
+
+  //@Brief: Pointer to cu_id array
+  uint32_t* cu_;
+
+  uint32_t manual_input;
+  uint32_t group_input;
+};
+
+#endif
+
@@ -0,0 +1,293 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "device_load_bandwidth.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "common/os.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+
+// TODO: The validation code has problems to debug
+#if 0
+static void initGlobalReadBuffer(uint32_t* in_data, uint32_t num_thrds,
+                                 uint32_t num_ops, uint32_t num_loops) {
+
+  // Populate input buffer with thread Id left shifted by 2.
+  uint32_t value = 0;
+  uint32_t val_idx;
+
+  for (uint32_t idx1 = 0; idx1 < num_loops; idx1++) {
+    val_idx = 0;
+    for (uint32_t idx2 = 0; idx2 < num_ops; idx2++) {
+      // Write the value to be read by each thread
+      for (uint32_t idx3 = 0; idx3 < num_thrds; idx3++) {
+        value = idx3 << 2;
+        in_data[val_idx++] = value;
+      }
+    }
+  }
+
+  return;
+}
+
+static bool verifyGlobalLoadKernel(uint32_t* data, uint32_t num_thrds,
+                                   uint32_t scale, const char* kernel_name) {
+
+  // Verify kernel operation i.e. validate the data in the output buffer.
+  uint32_t valid_value = 0;
+
+  for (uint32_t idx = 0; idx < num_thrds; idx++) {
+
+    valid_value = (idx << 2) * scale;
+
+
+    if (data[idx] != valid_value) {
+      std::cout << "Value expected = " << valid_value << std::endl;
+      std::cout << "Value of data = " << data[idx] << std::endl;
+
+      std::cout << kernel_name << ": VALIDATION FAILED ! Bad index: " << idx
+                << std::endl;
+      std::cout << kernel_name << ": VALUE @ Bad index: " << data[idx]
+                << std::endl;
+      std::cout << std::endl;
+      return false;
+    }
+  }
+
+#ifdef DEBUG
+  std::cout << kernel_name << ": Passed validation" << std::endl;
+  std::cout << std::endl;
+#endif
+
+  return true;
+}
+#endif
+
+// Constructor
+DeviceLoadBandwidth::DeviceLoadBandwidth() :
+  BaseRocR() {
+
+  set_group_size(0);
+  set_enable_interrupt(false);
+
+  num_group_ = 0;
+  num_cus_ = 0;
+
+  kernel_loop_count_ = 0;
+  mean_ = 0.0;
+  data_size_ = 0;
+
+  set_requires_profile (HSA_PROFILE_BASE);
+}
+
+// Destructor
+DeviceLoadBandwidth::~DeviceLoadBandwidth() {
+}
+
+// Set up the test environment
+void DeviceLoadBandwidth::SetUp() {
+  SetWorkItemNum();
+
+  set_kernel_file_name("sysMemRead.o");
+  set_kernel_name("&__SysMemLoad");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  //Create a queue with max number size
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  ASSERT_NE(q, nullptr);
+  set_main_queue(q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  uint32_t total_work_items = num_cus_ * num_group_ * group_size();
+
+  //Fill up part of aql
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = group_size();
+  aql().grid_size_x = total_work_items;
+
+  return;
+}
+
+// Run the test
+void DeviceLoadBandwidth::Run() {
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  uint32_t total_workitems = num_cus_ * num_group_ * group_size();
+
+  uint32_t ops_thrd = 32;
+  uint64_t addr_step = (uint64_t) total_workitems * sizeof(uint64_t);
+  uint64_t total_ops = (uint64_t) total_workitems * ops_thrd;
+  uint64_t in_data_size = (uint64_t) total_ops * sizeof(uint64_t);
+
+  data_size_ = in_data_size;
+
+  err = rocrtst::SetPoolsTypical(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = rocrtst::AllocAndAllowAccess(this, in_data_size, device_pool(),
+                                                  (void**)&in_data_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  //uint32_t out_data_size = total_workitems * sizeof(uint64_t);
+  uint32_t out_data_size = in_data_size;
+
+  err = rocrtst::AllocAndAllowAccess(this, out_data_size, device_pool(),
+                                                          (void**)&out_data_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+#if 0
+  initGlobalReadBuffer(in_data_, total_workitems, ops_thrd, kernel_loop_count_);
+#endif
+
+  struct local_args_t {
+    void* arg0;
+    void* arg1;
+    uint64_t arg2;
+    void* arg3;
+  } local_args;
+
+  local_args.arg0 = in_data_;
+  local_args.arg1 = in_data_ + total_ops;
+  local_args.arg2 = addr_step;
+  local_args.arg3 = out_data_;
+
+  // Copy the kernel args structure into a registered memory block
+  err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  std::vector<double> time;
+
+  rocrtst::WriteAQLToQueue(this);
+  // Write the aql packet at the calculated queue index address.
+  const uint32_t queue_mask = main_queue()->size - 1;
+  void * q_base = main_queue()->base_address;
+
+  for (uint32_t i = 0; i < num_iteration(); i++) {
+    uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
+
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+
+    uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
+    rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
+             &((hsa_kernel_dispatch_packet_t*)(q_base))[que_idx & queue_mask]);
+    hsa_signal_store_screlease(main_queue()->doorbell_signal, que_idx);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    p_timer.StopTimer(id);
+
+#ifdef DEBUG
+    std::cout << "." << std::flush;
+#endif
+
+#if 0
+    // Verify the results
+   uint32_t scale = kernel_loop_count_ * ops_thrd;
+   verifyGlobalLoadKernel(out_data_, total_workitems, scale,
+                                                     kernel_name().c_str());
+#endif
+   time.push_back(p_timer.ReadTimer(id));
+
+    hsa_signal_store_screlease(signal(), 1);
+  }
+
+#ifdef DEBUG
+  std::cout << std::endl;
+#endif
+
+  time.erase(time.begin());
+  std::sort(time.begin(), time.end());
+  time.erase(time.begin() + num_iteration(), time.end());
+  mean_ = rocrtst::CalcMean(time);
+
+  return;
+}
+
+void DeviceLoadBandwidth::Close() {
+  hsa_status_t err;
+
+  err = hsa_amd_memory_pool_free(in_data_);
+  EXPECT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_memory_pool_free(out_data_);
+  EXPECT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  return;
+}
+
+void DeviceLoadBandwidth::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "=======================================" << std::endl;
+  std::cout << "Device Load Bandwidth:     ";
+  std::cout << data_size_ / mean_ / 1024 / 1024 / 1024 << "(GB/S)" << std::endl;
+  std::cout << "=======================================" << std::endl;
+
+  return;
+}
@@ -0,0 +1,119 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_INC_DEVICE_LOAD_BANDWIDTH_H__
+#define __ROCRTST_SRC_INC_DEVICE_LOAD_BANDWIDTH_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include <stdio.h>
+
+class DeviceLoadBandwidth: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  DeviceLoadBandwidth();
+
+  //@Brief: Destructor
+  ~DeviceLoadBandwidth();
+
+  //@Brief: Set up the testing environment
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Close and clean up  the test enrionment
+  virtual void Close();
+
+  //@Brief: Display  load bandwidth
+  virtual void DisplayResults() const;
+
+  //@Brief: Set work-item configuration
+  void SetWorkItemNum() {
+#ifdef INTERACTIVE
+    uint32_t tmp;
+    printf("Please input the number of CUs you want to try:\n");
+    scanf("%d", &num_cus_);
+
+    printf("Please input the number of groups you want to try:\n");
+    scanf("%d", &num_group_);
+
+    printf("Please input the size of each group:\n");
+    scanf("%d", &tmp);
+    set_group_size(tmp);
+
+    printf("Please input the number of kernel loop you want to try:\n");
+    scanf("%d", &kernel_loop_count_);
+#else
+    num_cus_ = 16;
+    num_group_ = 128;
+    set_group_size(64);
+    kernel_loop_count_ = 16;
+#endif
+    return;
+  }
+
+ private:
+  //@Brief: number of group
+  uint32_t num_group_;
+
+  //@Brief: number of CUs
+  uint32_t num_cus_;
+
+  //@Brief: number of kernel loop
+  uint32_t kernel_loop_count_;
+
+  //@Brief: Mean execution time
+  double mean_;
+
+  //@Brief: data size for test
+  uint64_t data_size_;
+  uint32_t* in_data_;
+  uint32_t* out_data_;
+};
+
+#endif
+
@@ -0,0 +1,219 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "device_store_bandwidth.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "gtest/gtest.h"
+
+// Constructor
+DeviceStoreBandwidth::DeviceStoreBandwidth() :
+  BaseRocR() {
+
+  set_group_size(0);
+  num_group_ = 0;
+  num_cus_ = 0;
+
+  kernel_loop_count_ = 0;
+  mean_ = 0.0;
+  data_size_ = 0;
+  set_requires_profile (HSA_PROFILE_BASE);
+  in_data_ = nullptr;
+  out_data_ = nullptr;
+}
+
+// Destructor
+DeviceStoreBandwidth::~DeviceStoreBandwidth() {
+}
+
+// Set up the test environment
+void DeviceStoreBandwidth::SetUp() {
+  SetWorkItemNum();
+
+  set_kernel_file_name("sysMemWrite.o");
+  set_kernel_name("&__SysMemStore");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  //Create a queue with max number size
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  ASSERT_NE(q, nullptr);
+  set_main_queue(q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  uint32_t total_work_items = num_cus_ * num_group_ * group_size();
+
+  //Fill up part of aql
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = group_size();
+  aql().grid_size_x = total_work_items;
+
+  return;
+}
+
+// Run the test
+void DeviceStoreBandwidth::Run() {
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  uint32_t total_workitems = num_cus_ * num_group_ * group_size();
+
+  uint32_t ops_thrd = 16;
+  uint64_t addr_step = (uint64_t) total_workitems * sizeof(uint32_t);
+  uint64_t total_ops = (uint64_t) total_workitems * kernel_loop_count_
+                       * ops_thrd;
+  uint64_t in_data_size = (uint64_t) total_ops * sizeof(uint32_t);
+
+  data_size_ = in_data_size;
+
+  err = rocrtst::SetPoolsTypical(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = rocrtst::AllocAndAllowAccess(this, in_data_size, device_pool(),
+                                                  (void**)&in_data_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  uint32_t out_data_size = total_workitems * sizeof(uint32_t);
+
+  err = rocrtst::AllocAndAllowAccess(this, out_data_size, device_pool(),
+                                                          (void**)&out_data_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  struct local_args_t {
+    void* arg0;
+    void* arg1;
+    uint64_t arg2;
+    void* arg3;
+  } local_args;
+
+  local_args.arg0 = in_data_;
+  local_args.arg1 = in_data_ + total_ops;
+  local_args.arg2 = addr_step;
+  local_args.arg3 = out_data_;
+
+  // Copy the kernel args structure into a registered memory block
+  err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  std::vector<double> time;
+
+  rocrtst::WriteAQLToQueue(this);
+
+  for (uint32_t i = 0; i < num_iteration(); i++) {
+    uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
+
+    // Write the aql packet at the calculated queue index address.
+    const uint32_t queue_mask = main_queue()->size - 1;
+
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+
+    void * q_base = main_queue()->base_address;
+    uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
+    rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
+             &((hsa_kernel_dispatch_packet_t*)(q_base))[que_idx & queue_mask]);
+    hsa_signal_store_screlease(main_queue()->doorbell_signal, que_idx);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    p_timer.StopTimer(id);
+
+#ifdef DEBUG
+    std::cout << "." << std::flush;
+#endif
+
+    time.push_back(p_timer.ReadTimer(id));
+
+    hsa_signal_store_screlease(signal(), 1);
+  }
+
+#ifdef DEBUG
+  std::cout << std::endl;
+#endif
+
+  time.erase(time.begin());
+  mean_ = rocrtst::CalcMean(time);
+
+  return;
+}
+
+void DeviceStoreBandwidth::Close() {
+  hsa_status_t err;
+
+  err = hsa_amd_memory_pool_free(in_data_);
+  EXPECT_EQ(err, HSA_STATUS_SUCCESS);
+  err = hsa_amd_memory_pool_free(out_data_);
+  EXPECT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  return;
+}
+
+void DeviceStoreBandwidth::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+  std::cout << "=======================================" << std::endl;
+  std::cout << "Device Store Bandwidth:     ";
+  std::cout << data_size_ / mean_ / 1024 / 1024 / 1024 << "(GB/S)" << std::endl;
+  std::cout << "=======================================" << std::endl;
+  return;
+}
@@ -0,0 +1,119 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_INC_DEVICE_STORE_BANDWIDTH_H__
+#define __ROCRTST_SRC_INC_DEVICE_STORE_BANDWIDTH_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include <stdio.h>
+
+class DeviceStoreBandwidth: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  DeviceStoreBandwidth();
+
+  //@Brief: Destructor
+  ~DeviceStoreBandwidth();
+
+  //@Brief: Set up the testing environment
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Close and clean up  the test enrionment
+  virtual void Close();
+
+  //@Brief: Display  load bandwidth
+  virtual void DisplayResults() const;
+
+  //@Brief: Set work-item configuration
+  void SetWorkItemNum() {
+#ifdef INTERACTIVE
+    uint32_t tmp;
+    printf("Please input the number of CUs you want to try:\n");
+    scanf("%d", &num_cus_);
+
+    printf("Please input the number of groups you want to try:\n");
+    scanf("%d", &num_group_);
+
+    printf("Please input the size of each group:\n");
+    scanf("%d", &tmp);
+    set_group_size(tmp);
+
+    printf("Please input the number of kernel loop you want to try:\n");
+    scanf("%d", &kernel_loop_count_);
+#else
+    num_cus_ = 32;
+    num_group_ = 128;
+    set_group_size(64);
+    kernel_loop_count_ = 16;
+#endif
+    return;
+  }
+
+ private:
+  //@Brief: number of group
+  uint32_t num_group_;
+
+  //@Brief: number of CUs
+  uint32_t num_cus_;
+
+  //@Brief: number of kernel loop
+  uint32_t kernel_loop_count_;
+
+  //@Brief: Mean execution time
+  double mean_;
+
+  //@Brief: data size for test
+  uint64_t data_size_;
+  uint32_t* in_data_;
+  uint32_t* out_data_;
+};
+
+#endif
+
@@ -0,0 +1,331 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "dispatch_time.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/os.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_finalize.h"
+#include <algorithm>
+
+DispatchTime::DispatchTime() :
+  BaseRocR() {
+  use_default_ = false;
+  launch_single_ = false;
+  queue_size_ = 0;
+  num_batch_ = 100000;
+  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
+  single_default_mean_ = 0.0;
+  single_interrupt_mean_ = 0.0;
+  multi_default_mean_ = 0.0;
+  multi_interrupt_mean_ = 0.0;
+}
+
+DispatchTime::~DispatchTime() {
+
+}
+
+void DispatchTime::SetUp() {
+  // If it indicates to use default signal, set env var properly
+  if (use_default_) {
+    set_enable_interrupt(false);
+  }
+  else {
+    set_enable_interrupt(true);
+  }
+
+  set_kernel_file_name("empty_kernel.o");
+  set_kernel_name("&__Empty_kernel");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  // Create a queue
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  ASSERT_NE(q, nullptr);
+  set_main_queue(q);
+
+  // Here, modify the batch size if it is larger than the queue size
+  if (!launch_single_) {
+    hsa_status_t err;
+    uint32_t size = 0;
+    err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    num_batch_ = num_batch_ > size ? size : num_batch_;
+  }
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  // Fill up the kernel packet except header
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = 1;
+  aql().grid_size_x = 1;
+}
+
+void DispatchTime::Run() {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  if (launch_single_) {
+    RunSingle();
+  }
+  else {
+    RunMulti();
+  }
+}
+
+size_t DispatchTime::RealIterationNum() {
+  return num_iteration() * 1.2 + 1;
+}
+
+void DispatchTime::RunSingle() {
+  std::vector<double> timer;
+
+  int it = RealIterationNum();
+  const uint32_t queue_mask = main_queue()->size - 1;
+
+  //queue should be empty
+  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
+            hsa_queue_load_write_index_scacquire(main_queue()));
+
+  void *q_base_addr = main_queue()->base_address;
+  for (int i = 0; i < it; i++) {
+    //Obtain the current queue write index.
+    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    ASSERT_LT(index, main_queue()->size + index);
+
+    //Write the aql packet at the calculated queue index address.
+
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+    //Get timing stamp and ring the doorbell to dispatch the kernel.
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
+                      HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
+
+    //Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    p_timer.StopTimer(id);
+
+    timer.push_back(p_timer.ReadTimer(id));
+    hsa_signal_store_screlease(signal(), 1);
+
+#ifdef DEBUG
+    std::cout << ".";
+    fflush(stdout);
+#endif
+  }
+
+  std::cout << std::endl;
+
+  //Abandon the first result and after sort, delete the last 2% value
+  timer.erase(timer.begin());
+  std::sort(timer.begin(), timer.end());
+
+  timer.erase(timer.begin() + num_iteration(), timer.end());
+
+  if (use_default_) {
+    single_default_mean_ = rocrtst::CalcMean(timer);
+  }
+  else {
+    single_interrupt_mean_ = rocrtst::CalcMean(timer);
+  }
+
+  return;
+}
+
+void DispatchTime::RunMulti() {
+  std::vector<double> timer;
+  int it = RealIterationNum();
+  const uint32_t queue_mask = main_queue()->size - 1;
+
+  //queue should be empty
+  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
+            hsa_queue_load_write_index_scacquire(main_queue()));
+
+  for (int i = 0; i < it; i++) {
+    uint64_t* index = (uint64_t*) malloc(sizeof(uint64_t) * num_batch_);
+
+    hsa_signal_store_screlease(signal(), num_batch_);
+
+    for (uint32_t j = 0; j < num_batch_; j++) {
+      //index[j] = hsa_queue_add_write_index_scacq_screl(main_queue(), 1);
+      index[j] = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+      //Write the aql packet at the calculated queue index address.
+      ((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
+          & queue_mask] = aql();
+
+      if (j == num_batch_ - 1) {
+        ((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
+            & queue_mask].header |= 1 << HSA_PACKET_HEADER_BARRIER;
+
+        //TODO: verify if the below is needed. I don't think it is. It should
+        // already be initialized to signal().
+        ((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
+            & queue_mask].completion_signal = signal();
+      }
+    }
+
+    // Set packet header reversly; set all headers except the very first
+    // one, for now.
+    for (uint32_t j = num_batch_ - 1; j > 0; j--) {
+
+      ((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
+          & queue_mask].header |= HSA_PACKET_TYPE_KERNEL_DISPATCH
+                                  << HSA_PACKET_HEADER_TYPE;
+    }
+
+    //Get timing stamp and ring the doorbell to dispatch the kernel.
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+    //Set the very first header...
+    ((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[0]
+        & queue_mask].header |= HSA_PACKET_TYPE_KERNEL_DISPATCH
+                                << HSA_PACKET_HEADER_TYPE;
+
+    for (uint32_t j = 0; j < num_batch_; j++) {
+      hsa_signal_store_screlease(main_queue()->doorbell_signal, index[j]);
+    }
+
+    //Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_EQ, 0,
+                                     UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+      ;
+
+    p_timer.StopTimer(id);
+
+    timer.push_back(p_timer.ReadTimer(id));
+    hsa_signal_store_screlease(signal(), 1);
+
+    free(index);
+
+#ifdef DEBUG
+    std::cout << ".";
+    fflush(stdout);
+#endif
+  }
+
+  std::cout << std::endl;
+
+  // Abandon the first result and after sort, delete the last 2% value
+  timer.erase(timer.begin());
+  std::sort(timer.begin(), timer.end());
+
+  timer.erase(timer.begin() + num_iteration(), timer.end());
+
+  if (use_default_) {
+    multi_default_mean_ = rocrtst::CalcMean(timer);
+  }
+  else {
+    multi_interrupt_mean_ = rocrtst::CalcMean(timer);
+  }
+
+  return;
+}
+
+void DispatchTime::DisplayResults() const {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "===================================================="
+            << std::endl;
+
+  if (use_default_) {
+    if (launch_single_) {
+      std::cout << "Single_Default:       " << single_default_mean_ * 1e6
+                << std::endl;
+    }
+    else {
+      std::cout << "Multi_Default:         "
+                << multi_default_mean_ * 1e6 / num_batch_ << std::endl;
+    }
+  }
+  else {
+    if (launch_single_) {
+      std::cout << "Single_Interrupt:       " << single_interrupt_mean_ * 1e6
+                << std::endl;
+    }
+    else {
+      std::cout << "Multi_Interrupt:         "
+                << multi_interrupt_mean_ * 1e6 / num_batch_ << std::endl;
+    }
+  }
+
+  std::cout << "====================================================="
+            << std::endl;
+
+  return;
+}
+
+void DispatchTime::Close() {
+  hsa_status_t err;
+
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  return;
+}
@@ -0,0 +1,125 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_DISPATCH_TIME_H__
+#define __ROCRTST_SRC_DISPATCH_TIME_H__
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "common/common.h"
+#include "hsa/hsa.h"
+#include <vector>
+
+//@Brief: This class is defined to measure the mean latency of launching
+//an empty kernel
+
+class DispatchTime: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  DispatchTime();
+
+  //@Brief: Destructor
+  virtual ~DispatchTime();
+
+  //@Brief: Set up the environment for the test
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Display  results we got
+  virtual void DisplayResults() const;
+
+  //@Brief: Clean up and close the runtime
+  virtual void Close();
+
+  //@Brief: Choose if use default signal or not
+  void UseDefaultSignal(bool use_default = true) {
+    use_default_ = use_default;
+  }
+
+  //@Brief; Choose to launch a single kernels or not
+  void LaunchSingleKernel(bool launch_single = true) {
+    launch_single_ = launch_single;
+  }
+
+ private:
+  //@Brief: Get actual iteration number
+  virtual size_t RealIterationNum();
+
+  //@Brief: Launch single packet each time
+  virtual void RunSingle();
+
+  //@Brief: Launch multiple packets each time
+  virtual void RunMulti();
+
+  //@Brief: Indicate if use default signal or not
+  bool use_default_;
+
+  //@Brief: Indicate if launch single kernel or not
+  bool launch_single_;
+
+  //@Brief: Store the size of queue
+  uint32_t queue_size_;
+
+  //@Brief: Number of packets in a batch
+  uint32_t num_batch_;
+
+  //@Brief: Time of single default signal dispatch time
+  double single_default_mean_;
+
+  //@Brief: Time of single interrupt signal dispatch time
+  double single_interrupt_mean_;
+
+  //@Brief: Time of multi default signal dispatch time
+  double multi_default_mean_;
+
+  //@Brief: Time of multi interrupt signal dispatch time
+  double multi_interrupt_mean_;
+
+  char* orig_iterrupt_env_;
+};
+
+#endif
+
@@ -0,0 +1,351 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "flush_latency.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "common/os.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+
+static const int kWorkItem = 1024 * 1204;
+// Constructor
+FlushLatency::FlushLatency() :
+  BaseRocR() {
+  set_group_size(0);
+  num_group_ = 0;
+  num_cus_ = 0;
+
+  kernel_loop_count_ = 0;
+  mean_ = 0.0;
+  data_size_ = 0;
+
+  set_requires_profile (HSA_PROFILE_BASE);
+}
+
+// Destructor
+FlushLatency::~FlushLatency() {
+}
+
+// Set up the test environment
+void FlushLatency::SetUp() {
+  hsa_status_t err;
+
+  SetWorkItemNum();
+
+  set_kernel_file_name("flush_latency.o");
+  set_kernel_name("&main");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  //Create a queue with max number size
+  hsa_queue_t* q;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  set_main_queue(q);
+
+  //Enable profiling
+  err = hsa_amd_profiling_set_profiler_enabled(main_queue(), 1);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  uint32_t total_work_items = kWorkItem * 0.3;
+
+  //Fill up part of aql
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().workgroup_size_x = group_size();
+  aql().grid_size_x = total_work_items;
+
+  return;
+}
+
+// Run the test
+void FlushLatency::Run() {
+  hsa_status_t err;
+  hsa_amd_memory_pool_t cpu_pool;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  err = hsa_amd_agent_iterate_memory_pools(*gpu_dev, rocrtst::FindStandardPool,
+                                                                &device_pool());
+  ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+
+  ASSERT_NE(device_pool().handle, 0);
+
+  cpu_pool.handle = 0;
+  err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
+        &cpu_pool);
+  ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+
+  ASSERT_NE(cpu_pool.handle, 0);
+
+#if DEBUG
+  std::cout << "Device Pool Properties:" << std::endl;
+  err = rocrtst::DumpMemoryPoolInfo(device_pool());
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  std::cout << "Global Pool Properties:" << std::endl;
+  err = rocrtst::DumpMemoryPoolInfo(cpu_pool);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+#endif
+  uint32_t out_data_size = 1024 * 1024 * sizeof(uint32_t);
+
+  std::vector<double> time_none;
+  std::vector<double> time_release;
+
+  std::vector < uint64_t > time_none_stamp;
+  std::vector < uint64_t > time_release_stamp;
+
+  //Query system timestamp frequency
+  uint64_t freq;
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  void* out = NULL;
+  uint32_t* out_data;
+  const uint32_t queue_mask = main_queue()->size - 1;
+  typedef struct local_args_t {
+    void* arg0;
+  } args;
+
+  // Warm up
+  uint16_t header = 0;
+  header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  aql().header = header;
+
+  err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
+                                     (void**) &out_data);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  args* kern_ptr = NULL;
+  err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
+                                     (void**) &kern_ptr);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  kern_ptr->arg0 = out_data;
+
+  aql().kernarg_address = kern_ptr;
+
+  // Obtain the current queue write index
+  int64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+  void *q_base_addr = main_queue()->base_address;
+  // Write the aql packet at the calculated queue index address.
+  ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+  hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
+
+  // Wait on the dispatch signal until the kernel is finished.
+  while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                   (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+    ;
+
+  hsa_signal_store_screlease(signal(), 1);
+
+  for (int i = 0; i < 1000; i++) {
+    err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
+                                       (void**) &out_data);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    args* kern_ptr = NULL;
+    err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
+                                       (void**) &kern_ptr);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    kern_ptr->arg0 = out_data;
+
+    aql().kernarg_address = kern_ptr;
+
+    // Obtain the current queue write index
+    int64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    // Write the aql packet at the calculated queue index address.
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    hsa_amd_profiling_dispatch_time_t dispatch_time;
+    err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
+          &dispatch_time);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    uint64_t sys_start = 0;
+    uint64_t sys_end = 0;
+    err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
+          dispatch_time.start, &sys_start);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+    err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
+          dispatch_time.end, &sys_end);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    uint64_t stamp = dispatch_time.end - dispatch_time.start;
+    double execution_time = (double) stamp / freq * 1e6; // convert to us.
+
+    time_none.push_back(execution_time);
+    time_none_stamp.push_back(stamp);
+
+    hsa_signal_store_screlease(signal(), 1);
+
+    if (out != NULL) {
+      err = hsa_memory_free(out);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+    }
+
+    out = out_data;
+    out_data = NULL;
+  }
+
+  header = 0;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  aql().header = header;
+
+  for (int i = 0; i < 1000; i++) {
+    err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
+                                       (void**) &out_data);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    args* kern_ptr = NULL;
+    err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
+                                       (void**) &kern_ptr);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    kern_ptr->arg0 = out_data;
+
+    aql().kernarg_address = kern_ptr;
+
+    // Obtain the current queue write index
+    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    // Write the aql packet at the calculated queue index address.
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    hsa_signal_store_screlease(signal(), 1);
+
+    hsa_amd_profiling_dispatch_time_t dispatch_time;
+    err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
+          &dispatch_time);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    uint64_t sys_start = 0;
+    uint64_t sys_end = 0;
+    err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
+          dispatch_time.start, &sys_start);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+    err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
+          dispatch_time.end, &sys_end);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    uint64_t stamp = dispatch_time.end - dispatch_time.start;
+    double execution_time = (double) stamp / freq * 1e6; // convert to us.
+    time_release.push_back(execution_time);
+    time_release_stamp.push_back(stamp);
+
+    if (out != NULL) {
+      err = hsa_memory_free(out);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+    }
+
+    out = out_data;
+    out_data = NULL;
+  }
+
+  std::sort(time_none.begin(), time_none.end());
+  std::sort(time_release.begin(), time_release.end());
+
+  time_none.erase(time_none.begin(), time_none.begin() + 50);
+  time_none.erase(time_none.end() - 50, time_none.end());
+  time_release.erase(time_release.begin(), time_release.begin() + 50);
+  time_release.erase(time_release.end() - 50, time_release.end());
+
+  mean_ = rocrtst::CalcMean(time_none, time_release);
+
+  return;
+}
+
+void FlushLatency::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void FlushLatency::DisplayResults() const {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << std::endl << "======================================="
+            << std::endl;
+  std::cout << "Average cache flush overhead:     " << mean_ << "uS"
+            << std::endl;
+  std::cout << "=======================================" << std::endl;
+  return;
+}
@@ -0,0 +1,122 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_INC_FLUSH_LATENCY_H__
+#define __ROCRTST_SRC_INC_FLUSH_LATENCY_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include <stdio.h>
+
+class FlushLatency: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  FlushLatency();
+
+  //@Brief: Destructor
+  ~FlushLatency();
+
+  //@Brief: Set up the testing environment
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Close and clean up  the test enrionment
+  virtual void Close();
+
+  //@Brief: Display  load bandwidth
+  virtual void DisplayResults() const;
+
+  //@Brief: Set work-item configuration
+  void SetWorkItemNum() {
+#ifdef INTERACTIVE
+    uint32_t tmp;
+    printf("Please input the number of CUs you want to try:\n");
+    int i;
+    i = scanf("%d", &num_cus_);
+
+    printf("Please input the number of groups you want to try:\n");
+    i = scanf("%d", &num_group_);
+
+    printf("Please input the size of each group:\n");
+    i = scanf("%d", &tmp);
+    set_group_size(tmp);
+
+    printf("Please input the number of kernel loop you want to try:\n");
+    i = scanf("%d", &kernel_loop_count_);
+#else
+    num_cus_ = 32;
+    num_group_ = 128;
+    group_size_ = 256;
+    kernel_loop_count_ = 16;
+#endif
+    return;
+  }
+
+ private:
+  //@Brief: number of work item in one group
+  uint32_t group_size_;
+
+  //@Brief: number of group
+  uint32_t num_group_;
+
+  //@Brief: number of CUs
+  uint32_t num_cus_;
+
+  //@Brief: number of kernel loop
+  uint32_t kernel_loop_count_;
+
+  //@Brief: Mean execution time
+  double mean_;
+
+  //@Brief: data size for test
+  uint64_t data_size_;
+
+};
+
+#endif
+
@@ -0,0 +1,502 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "hsa_info.h"
+
+static hsa_status_t get_agent_info(hsa_agent_t, void*);
+
+static hsa_status_t get_pool_info(hsa_amd_memory_pool_t, void*);
+
+static int agent_number = 0;
+static bool output_amd = false;
+
+//@Brief: Map to store the peak FLOPS for different agent
+std::map<std::string, double> flops_table = { {"Kaveri CPU", 118.4}, {
+    "S    pectre", 737.0
+  }, {"Carrizo CPU", 67.2}, {"Carrizo GPU", 819.2}
+};
+
+//@Brief: Vector to store the agent_names
+std::vector<std::string> agent_names = {"Kaveri CPU", "Spectre",
+                                        "Carri    zo CPU", "Carrizo GPU"
+                                       };
+
+HsaInfo::HsaInfo() :
+  BaseRocR() {
+}
+
+HsaInfo::~HsaInfo() {
+}
+
+void HsaInfo::SetUp() {
+  // Get Env Var to determine if output AMD specific info
+  char* EnvVar = rocrtst::GetEnv("HSA_VENDOR_AMD");
+
+  if (NULL != EnvVar) {
+    output_amd = ('1' == *EnvVar);
+  }
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+}
+
+void HsaInfo::Run() {
+  hsa_status_t err;
+  // Get the system info first
+  // Get version info
+  uint16_t major, minor;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Get timestamp frequency
+  uint64_t timestamp_frequency = 0;
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY,
+                            &timestamp_frequency);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Get maximum duration of a signal wait operation
+  uint64_t max_wait = 0;
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &max_wait);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Get Endianness of the system
+  hsa_endianness_t endianness;
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &endianness);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Get machine model info
+  hsa_machine_model_t machine_model;
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Print out the results
+  std::cout << "HSA System Info:" << std::endl;
+  std::cout << "Runtime Version:				" << major <<
+                                                     "." << minor << std::endl;
+  std::cout << "System Timestamp Frequency: 			" <<
+                               timestamp_frequency / 1e6 << "MHz" << std::endl;
+
+  std::cout << "Signal Max Wait Duration:                        " << max_wait
+            << "(number of timestamp)" << std::endl;
+  std::cout << "Machine Model:					";
+
+  if (HSA_MACHINE_MODEL_SMALL == machine_model) {
+    std::cout << "SMALL" << std::endl;
+  }
+  else if (HSA_MACHINE_MODEL_LARGE == machine_model) {
+    std::cout << "LARGE" << std::endl;
+  }
+
+  std::cout << "System Endianness:				";
+
+  if (HSA_ENDIANNESS_LITTLE == endianness) {
+    std::cout << "LITTLE" << std::endl;
+  }
+  else if (HSA_ENDIANNESS_BIG == endianness) {
+    std::cout << "BIG" << std::endl;
+  }
+
+  std::cout << std::endl;
+
+  // Iterate every agent and get their info
+  err = hsa_iterate_agents(get_agent_info, NULL);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  return;
+
+}
+
+#define RET_IF_HSA_INFO_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
+              __FILE__ << std::endl; \
+    return (err); \
+  } \
+}
+
+static hsa_status_t get_agent_info(hsa_agent_t agent, void* data) {
+  int pool_number = 0;
+  hsa_status_t err;
+  {
+    // Increase the number of agent
+    agent_number++;
+
+    // Get agent name and vendor
+    char name[64];
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
+    RET_IF_HSA_INFO_ERR(err)
+    char vendor_name[64];
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &vendor_name);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get agent feature
+    hsa_agent_feature_t agent_feature;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_feature);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get profile supported by the agent
+    hsa_profile_t agent_profile;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get floating-point rounding mode
+    hsa_default_float_rounding_mode_t float_rounding_mode;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE,
+                             &float_rounding_mode);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get max number of queue
+    uint32_t max_queue = 0;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &max_queue);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get queue min size
+    uint32_t queue_min_size = 0;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE,
+                             &queue_min_size);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get queue max size
+    uint32_t queue_max_size = 0;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+                             &queue_max_size);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get queue type
+    hsa_queue_type_t queue_type;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get agent node
+    uint32_t node;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get device type
+    hsa_device_type_t device_type;
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get cache size
+    uint32_t cache_size[4];
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get chip id
+    uint32_t chip_id = 0;
+    err = hsa_agent_get_info(agent,
+                             (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID,
+                                                                     &chip_id);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get cacheline size
+    uint32_t cacheline_size = 0;
+    err = hsa_agent_get_info(agent,
+                         (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
+                                                              &cacheline_size);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get Max clock frequency
+    uint32_t max_clock_freq = 0;
+    err = hsa_agent_get_info(agent,
+                    (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,
+                                                              &max_clock_freq);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get Agent BDFID
+    uint16_t bdf_id = 1;
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
+                             &bdf_id);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Get number of Compute Unit
+    uint32_t compute_unit = 0;
+    err = hsa_agent_get_info(agent,
+                     (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
+                                                                &compute_unit);
+    RET_IF_HSA_INFO_ERR(err)
+
+    // Print out the common results
+    std::cout << std::endl;
+    std::cout << "Agent #" << agent_number << ":" << std::endl;
+    std::cout << "Agent Name:					" << name <<
+                                                                     std::endl;
+    std::cout << "Agent Vendor Name:				" <<
+                                                      vendor_name << std::endl;
+
+    if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH
+        && agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH)
+      std::cout << "Agent Feature:					KERNEL_DISPATCH & AGENT_DISPATCH"
+                << std::endl;
+    else if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
+      std::cout << "Agent Feature:					KERNEL_DISPATCH" << std::endl;
+    }
+    else if (agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) {
+      std::cout << "Agent Feature:					AGENT_DISPATCH" << std::endl;
+    }
+    else {
+      std::cout << "Agent Feature:					Not Supported" << std::endl;
+    }
+
+    if (HSA_PROFILE_BASE == agent_profile) {
+      std::cout << "Agent Profile:					BASE_PROFILE" << std::endl;
+    }
+    else if (HSA_PROFILE_FULL == agent_profile) {
+      std::cout << "Agent Profile:					FULL_PROFILE" << std::endl;
+    }
+    else {
+      std::cout << "Agent Profile:					Not Supported" << std::endl;
+    }
+
+    if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO == float_rounding_mode) {
+      std::cout << "Agent Floating Rounding Mode:			ZERO" << std::endl;
+    }
+    else if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR == float_rounding_mode) {
+      std::cout << "Agent Floating Rounding Mode:			NEAR" << std::endl;
+    }
+    else {
+      std::cout << "Agent Floating Rounding Mode:			Not Supported" << std::endl;
+    }
+
+    std::cout << "Agent Max Queue Number:				" << max_queue << std::endl;
+    std::cout << "Agent Queue Min Size:				" << queue_min_size << std::endl;
+    std::cout << "Agent Queue Max Size:				" << queue_max_size << std::endl;
+
+    if (HSA_QUEUE_TYPE_MULTI == queue_type) {
+      std::cout << "Agent Queue Type:				MULTI" << std::endl;
+    }
+    else if (HSA_QUEUE_TYPE_SINGLE == queue_type) {
+      std::cout << "Agent Queue Type:				SINGLE" << std::endl;
+    }
+    else {
+      std::cout << "Agent Queue Type:				Not Supported" << std::endl;
+    }
+
+    std::cout << "Agent Node:					" << node << std::endl;
+
+    if (HSA_DEVICE_TYPE_CPU == device_type) {
+      std::cout << "Agent Device Type:				CPU" << std::endl;
+    }
+    else if (HSA_DEVICE_TYPE_GPU == device_type) {
+      std::cout << "Agent Device Type:				GPU" << std::endl;
+      // Get ISA info
+      hsa_isa_t agent_isa;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa);
+      RET_IF_HSA_INFO_ERR(err)
+    }
+    else {
+      std::cout << "Agent Device Type:				DSP" << std::endl;
+    }
+
+    std::cout << "Agent Cache Info:" << std::endl;
+
+    for (int i = 0; i < 4; i++) {
+      if (cache_size[i]) {
+        std::cout << "  $L" << i + 1 << ":						" << cache_size[i] / 1024
+                  << "KB" << std::endl;
+      }
+    }
+
+    std::cout << "Agent Chip ID:					" << chip_id << std::endl;
+    std::cout << "Agent Cacheline Size:				" << cacheline_size << std::endl;
+    std::cout << "Agent Max Clock Frequency:			" << max_clock_freq << "MHz"
+              << std::endl;
+    std::cout << "Agent BDFID:					" << bdf_id << std::endl;
+    std::cout << "Agent Compute Unit:				" << compute_unit << std::endl;
+
+    // Output Peak FLOPS and Peak Bandwidth if Env var is set
+    // TODO: Fan, need to add BW
+    if (output_amd) {
+      std::string agent_name = name;
+
+      for (size_t i = 0; i < agent_names.size(); i++) {
+        if (agent_name.compare(agent_names[i]) == 0)
+          std::cout << "Agent Peak GFLOPS:				" << flops_table[agent_name]
+                    << std::endl;
+      }
+    }
+
+    // Check if the agent is kernel agent
+    if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
+
+      // Get flaf of fast_f16 operation
+      bool fast_f16;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION,
+                               &fast_f16);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get wavefront size
+      uint32_t wavefront_size = 0;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,
+                               &wavefront_size);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get max total number of work-items in a workgroup
+      uint32_t workgroup_max_size = 0;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
+                               &workgroup_max_size);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get max number of work-items of each dimension of a work-group
+      uint16_t workgroup_max_dim[3];
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
+                               &workgroup_max_dim);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get max number of a grid per dimension
+      hsa_dim3_t grid_max_dim;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM,
+                               &grid_max_dim);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get max total number of work-items in a grid
+      uint32_t grid_max_size = 0;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE,
+                               &grid_max_size);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Get max number of fbarriers per work group
+      uint32_t fbarrier_max_size = 0;
+      err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE,
+                               &fbarrier_max_size);
+      RET_IF_HSA_INFO_ERR(err)
+
+      // Print info for kernel agent
+      if (true == fast_f16) {
+        std::cout << "Agent Fast F16 Operation:			TRUE" <<
+                                                                    std::endl;
+      }
+
+      std::cout << "Agent Wavefront Size:				" <<
+                                                  wavefront_size << std::endl;
+      std::cout << "Agent Workgroup Max Size:			" <<
+                                              workgroup_max_size << std::endl;
+      std::cout <<
+               "Agent Workgroup Max Size Per Dimension:			" <<
+                                                                    std::endl;
+
+      for (int i = 0; i < 3; i++) {
+        std::cout << "  Dim[" << i <<
+            "]:					" << workgroup_max_dim[i] <<
+                                                                    std::endl;
+      }
+
+      std::cout << "Agent Grid Max Size:				" <<
+                                                   grid_max_size << std::endl;
+
+      // Stop using the above kmt functions as per SWDEV-97044
+      //
+      uint32_t waves_per_cu = 0;
+      err = hsa_agent_get_info(agent,
+                        (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
+                                                                &waves_per_cu);
+      RET_IF_HSA_INFO_ERR(err)
+      std::cout << "Agent Waves Per CU:				" <<
+                                                     waves_per_cu << std::endl;
+      std::cout << "Agent Max Work-item Per CU:			"
+                << wavefront_size* waves_per_cu << std::endl;
+
+      std::cout << "Agent Grid Max Size per Dimension:" << std::endl;
+
+      for (int i = 0; i < 3; i++) {
+        std::cout << "  Dim[" << i <<
+                                     "]					"
+                 << reinterpret_cast<uint32_t*>(&grid_max_dim)[i] << std::endl;
+      }
+
+      std::cout << "Agent Max number Of fbarriers Per Workgroup:	"
+                << fbarrier_max_size << std::endl;
+    }
+  }
+
+  // Get pool info
+  std::cout << "Agent Pool Info:" << std::endl;
+  err = hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, &pool_number);
+  RET_IF_HSA_INFO_ERR(err)
+
+  return HSA_STATUS_SUCCESS;
+}
+
+// Implement region iteration function
+hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) {
+  hsa_status_t err;
+  int* p_int = reinterpret_cast<int*>(data);
+  (*p_int)++;
+
+  std::cout << "  Pool #" << *p_int << ":" << std::endl;
+
+  err = rocrtst::DumpMemoryPoolInfo(pool, 4);
+  RET_IF_HSA_INFO_ERR(err)
+
+  return err;
+}
+
+#undef RET_IF_HSA_INFO_ERR
+
+void HsaInfo::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  return;
+}
+
+void HsaInfo::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  return;
+}
+
@@ -0,0 +1,85 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_HSA_INFO_H__
+#define __ROCRTST_SRC_HSA_INFO_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "common/common.h"
+#include "common/os.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+//@Brief: This is trying to replicate clinfo
+
+class HsaInfo: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  HsaInfo();
+
+  //@Brief: Destructor
+  virtual ~HsaInfo();
+
+  //@Brief: Set up the environment for the test
+  virtual void SetUp();
+
+  //@Brief: Run the test case
+  virtual void Run();
+
+  //@Brief: Display  results we got
+  virtual void DisplayResults() const;
+
+  //@Brief: Clean up and close the runtime
+  virtual void Close();
+
+};
+
+#endif
+
@@ -0,0 +1,328 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "image_bandwidth.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/hsatimer.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_image.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <algorithm>
+
+ImageBandwidth::ImageBandwidth(size_t num) :
+  BaseRocR(), import_bandwidth_ {0.0}, export_bandwidth_ {0.0},
+                                                        copy_bandwidth_ {0.0} {
+  format_.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
+  format_.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
+  geometry_ = HSA_EXT_IMAGE_GEOMETRY_2D;
+
+  set_requires_profile (HSA_PROFILE_FULL);
+}
+
+ImageBandwidth::~ImageBandwidth() {
+}
+
+const size_t ImageBandwidth::Size[10] = {32, 64, 128, 256, 512, 1024, 2048,
+                                         4096, 8192, 16384
+                                        };
+const char* const ImageBandwidth::Str[10] = {"4K", "16K", "64K", "256K", "1M",
+                                             "4M", "16M", "64M", "256M", "1G"
+                                            };
+
+void ImageBandwidth::SetUp() {
+  hsa_status_t err;
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  // Find the global region
+  err = hsa_amd_agent_iterate_memory_pools(*gpu_dev, rocrtst::FindGlobalPool,
+                                                                  &cpu_pool());
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void ImageBandwidth::Run() {
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  for (int i = 0; i < 10; i++) {
+    // Create timer for import, export and copy tests
+    rocrtst::PerfTimer import_timer;
+    rocrtst::PerfTimer export_timer;
+    rocrtst::PerfTimer copy_timer;
+    std::vector<double> import_image;
+    std::vector<double> export_image;
+    std::vector<double> copy_image;
+    // Allocate image buffer in host memory
+    uint32_t* image_buffer = NULL;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(),
+                                       Size[i] * Size[i] * sizeof(uint32_t),
+                                                    0, (void**) &image_buffer);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // rocrtst::CommonCleanUp the image buffer
+    for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
+      image_buffer[j] = 0x10101010;
+    }
+
+    // Prepare for 2D image creation
+    hsa_ext_image_t image_handle;
+
+    hsa_ext_image_descriptor_t image_descriptor;
+    image_descriptor.geometry = geometry_;
+    image_descriptor.width = Size[i];
+    image_descriptor.height = Size[i];
+    image_descriptor.depth = 1;
+    image_descriptor.array_size = 0;
+    image_descriptor.format = format_;
+
+    // Check if device_ supports at least read and write operation on
+    // image format
+    uint32_t capability_mask;
+    err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
+                                       &format_, &capability_mask);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_WRITE)) {
+      std::cout <<
+       "Device does not support read and write operation on this kind of image!"
+                << std::endl;
+      ASSERT_NE(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_WRITE, 0);
+    }
+
+    // Get image info
+    hsa_ext_image_data_info_t image_info;
+    err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
+                                      HSA_ACCESS_PERMISSION_RW, &image_info);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Allocate memory for image
+    uintptr_t ptr_temp = 0;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(),
+              image_info.size + image_info.alignment, 0, (void**) &ptr_temp);
+
+    // Align the image address
+    uintptr_t mul = ptr_temp / image_info.alignment;
+    void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
+
+    // rocrtst::CommonCleanUp the image to 0
+    hsa_amd_memory_fill(ptr_image, 0, image_info.size);
+
+    // Create image handle
+    err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
+                               HSA_ACCESS_PERMISSION_RW, &image_handle);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Set import image region
+    hsa_dim3_t range = {(uint32_t) Size[i], (uint32_t) Size[i], 1};
+
+    hsa_ext_image_region_t image_region;
+    hsa_dim3_t image_offset = {0, 0, 0};
+    image_region.offset = image_offset;
+    image_region.range = range;
+
+    size_t iterations = RealIterationNum();
+
+    for (uint32_t it = 0; it < iterations; it++) {
+      // Create a timer
+      int index = import_timer.CreateTimer();
+
+      // Stamp at the beginning
+      import_timer.StartTimer(index);
+
+      // Import image from host
+      err = hsa_ext_image_import(*gpu_dev, image_buffer, 0, 0, image_handle,
+                                 &image_region);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      // Stamp in the end
+      import_timer.StopTimer(index);
+      import_image.push_back(import_timer.ReadTimer(index));
+    }
+
+    // Reset image_buffer
+    hsa_amd_memory_fill(image_buffer, 0, Size[i] * Size[i] * sizeof(uint32_t));
+
+    for (uint32_t it = 0; it < iterations; it++) {
+      // Export image
+      // Stamp at the beginning
+      int index = export_timer.CreateTimer();
+      export_timer.StartTimer(index);
+
+      err = hsa_ext_image_export(*gpu_dev, image_handle, image_buffer, 0, 0,
+                                 &image_region);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      export_timer.StopTimer(index);
+      export_image.push_back(export_timer.ReadTimer(index));
+
+      // Check if the value is correct
+      for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
+        ASSERT_EQ(image_buffer[j], 0x10101010);
+      }
+    }
+
+    // Create another image for copy
+    // Allocate memory for image
+    uintptr_t ptr_temp2 = 0;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(),
+              image_info.size + image_info.alignment, 0, (void**) &ptr_temp2);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Align the image address
+    mul = ptr_temp2 / image_info.alignment;
+    void* ptr_image2 = (void*) ((mul + 1) * image_info.alignment);
+
+    // rocrtst::CommonCleanUp the image to 0
+    hsa_amd_memory_fill(ptr_image2, 0, image_info.size);
+
+    // Create image handle
+    hsa_ext_image_t image_handle_copy;
+    err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image2,
+                               HSA_ACCESS_PERMISSION_RW, &image_handle_copy);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    for (uint32_t it = 0; it < iterations; it++) {
+      // Stamp at the beginning
+      int index = copy_timer.CreateTimer();
+      copy_timer.StartTimer(index);
+
+      err = hsa_ext_image_copy(*gpu_dev, image_handle, &image_offset,
+                               image_handle_copy, &image_offset, &range);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      // Stamp in the end
+      copy_timer.StopTimer(index);
+      copy_image.push_back(copy_timer.ReadTimer(index));
+
+      // Check if image data is correct
+      hsa_amd_memory_fill(image_buffer, 0,
+                                      Size[i] * Size[i] * sizeof(uint32_t));
+
+      // Export image
+      err = hsa_ext_image_export(*gpu_dev, image_handle_copy, image_buffer,
+                                 0, 0, &image_region);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      // Check if the value is correct
+      for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
+        ASSERT_EQ(image_buffer[j], 0x10101010);
+      }
+
+    }
+
+    // Calculate Bandwidth
+    import_bandwidth_[i] = CalculateBandwidth(import_image, Size[i]);
+    export_bandwidth_[i] = CalculateBandwidth(export_image, Size[i]);
+    copy_bandwidth_[i] = CalculateBandwidth(copy_image, Size[i]);
+  }
+}
+
+double ImageBandwidth::CalculateBandwidth(std::vector<double>& vec,
+    size_t size) {
+  double mean = 0.0;
+
+  // Delete the first timer result, which is warm up test
+  vec.erase(vec.begin());
+
+  // Sort the results
+  std::sort(vec.begin(), vec.end());
+
+  // Delete the last 20% of the results
+
+  vec.erase(vec.begin() + num_iteration(), vec.end());
+
+  int num = vec.size();
+
+  for (int index = 0; index < num; index++) {
+    mean += vec[index];
+  }
+
+  mean /= num;
+
+  return (double) size * size * 4 / mean / 1024 / 1024 / 1024;
+}
+
+void ImageBandwidth::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  fprintf(stdout, "==================================================="
+                                                "=========================\n");
+
+  fprintf(stdout,
+          "  Size        Import                Export                 Copy\n");
+
+  for (int i = 0; i < 10; i++) {
+    fprintf(stdout,
+            "  %s         %f(GB/s)          %f(GB/s)             %f(GB/s)\n",
+            Str[i], import_bandwidth_[i], export_bandwidth_[i],
+                                                           copy_bandwidth_[i]);
+    fprintf(stdout, "================================================="
+                                              "===========================\n");
+  }
+}
+
+void ImageBandwidth::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+size_t ImageBandwidth::RealIterationNum() {
+  return num_iteration() * 1.2 + 1;
+}
@@ -0,0 +1,99 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_IMAGE_BANDWIDTH_H__
+#define __ROCRTST_SRC_IMAGE_BANDWIDTH_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_image.h"
+#include <vector>
+
+class ImageBandwidth: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor for test case of ImageBandwidth
+  ImageBandwidth(size_t num = 100);
+
+  //@Brief: Destructor
+  virtual ~ImageBandwidth();
+
+  //@Brief: Setup the environment for measurement
+  virtual void SetUp();
+
+  //@Brief: Core measurement execution
+  virtual void Run();
+
+  //@Brief: Clean up and retrive the resource
+  virtual void Close();
+
+  //@Brief: Display  results
+  virtual void DisplayResults() const;
+
+ private:
+  //@Brief: Define image size and corresponding string
+  static const size_t Size[10];
+  static const char* const Str[10];
+
+  //@Brief: Get actual iteration number
+  size_t RealIterationNum();
+
+  //@Brief: Calculate Bandwidth
+  double CalculateBandwidth(std::vector<double>& vec, size_t size);
+
+ protected:
+  //@Brief: bandwidth data
+  double import_bandwidth_[10];
+  double export_bandwidth_[10];
+  double copy_bandwidth_[10];
+
+  //@Brief: Image format
+  hsa_ext_image_format_t format_;
+
+  //@Brief: Image geometry
+  hsa_ext_image_geometry_t geometry_;
+};
+
+#endif
@@ -0,0 +1,270 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "image_load_bandwidth.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/hsatimer.h"
+#include "common/helper_funcs.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa_ext_image.h"
+#include <stdio.h>
+#include <vector>
+
+// Constructor of the class
+ImageLoadBandwidth::ImageLoadBandwidth() :
+  BaseRocR() {
+  load_bandwidth_ = 0.0;
+  image_size_ = 0;
+
+  set_requires_profile (HSA_PROFILE_FULL);
+}
+
+// Destructor of the class
+ImageLoadBandwidth::~ImageLoadBandwidth() {
+
+}
+
+// Set up the environment
+void ImageLoadBandwidth::SetUp() {
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  set_kernel_file_name("load_2d_image.o");
+  set_kernel_name("&__OpenCL_load_2d_image_kernel");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  //Create a queue with max number size
+  hsa_queue_t* q = main_queue();
+  rocrtst::CreateQueue(*gpu_dev, &q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  //Fill up part of aql
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().setup = 0;
+  aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+
+  return;
+}
+
+// Run the test
+void ImageLoadBandwidth::Run() {
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  hsa_ext_image_descriptor_t image_descriptor;
+  image_descriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
+  image_descriptor.width = 256;
+  image_descriptor.height = 256;
+  image_descriptor.depth = 1;
+  image_descriptor.array_size = 0;
+  image_descriptor.format.channel_type =
+    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
+  image_descriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
+
+  hsa_ext_image_format_t image_format;
+  image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
+  image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
+
+  // Check if device_ supports at least read only operation on image format
+  uint32_t capability_mask;
+  err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
+                                     &image_format, &capability_mask);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY)) {
+    ASSERT_FALSE(
+     "Device does not support read and write operation on this kind of image!");
+  }
+
+  // Get image info
+  hsa_ext_image_data_info_t image_info;
+  err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
+                                    HSA_ACCESS_PERMISSION_RO, &image_info);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  image_size_ = image_info.size;
+
+  std::vector<double> time;
+
+  for (uint32_t i = 0; i < num_iteration(); i++) {
+#ifdef DEBUG
+    std::cout << ".";
+    fflush(stdout);
+#endif
+    // Allocate memory space for image
+    // Find the global region
+    err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
+                                                                   &cpu_pool());
+    ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+    uintptr_t ptr_temp = 0;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(),
+                                       image_info.size + image_info.alignment,
+                                                        0, (void**) &ptr_temp);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, (void*) ptr_temp);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Align the image address
+    uintptr_t mul = ptr_temp / image_info.alignment;
+    void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
+
+    // rocrtst::CommonCleanUp the image memory to 1
+    err = hsa_amd_memory_fill(ptr_image, 1, image_info.size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Create image handle
+    hsa_ext_image_t image_handle;
+    err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
+                               HSA_ACCESS_PERMISSION_RO, &image_handle);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Allocate and initialize the kernel argument
+    typedef struct args_t {
+      uint64_t arg0;
+      int* arg1;
+      int istart;
+      int iend;
+      int istep;
+    } args;
+
+    int local_out = 5;
+    int istart = 0;
+    int iend = 64;
+    int istep = 1;
+
+    args* kern_ptr = NULL;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
+                                       (void**) &kern_ptr);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    kern_ptr->arg0 = image_handle.handle;
+    kern_ptr->arg1 = &local_out;
+    kern_ptr->istart = istart;
+    kern_ptr->iend = iend;
+    kern_ptr->istep = istep;
+
+    aql().kernarg_address = kern_ptr;
+
+    // Obtain the current queue write index
+    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    void *q_base_addr = main_queue()->base_address;
+
+    // Write the aql packet at the calculated queue index address.
+    const uint32_t queue_mask = main_queue()->size - 1;
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
+                     HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+    hsa_signal_store_release(main_queue()->doorbell_signal, index);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    p_timer.StopTimer(id);
+
+    time.push_back(p_timer.ReadTimer(id));
+
+    hsa_signal_store_release(signal(), 1);
+
+    err = hsa_ext_image_destroy(*gpu_dev, image_handle);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    err = hsa_memory_deregister(ptr_image, image_info.size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    hsa_amd_memory_pool_free((void*) ptr_temp);
+  }
+
+  // Calculte the mean load time
+  time.erase(time.begin());
+#ifdef DEBUG
+
+  for (uint32_t i = 0; i < time.size(); i++) {
+    std::cout << time[i] << std::endl;
+  }
+
+#endif
+  double mean_time = rocrtst::CalcMean(time);
+  load_bandwidth_ = image_size_ / mean_time / 1024 / 1024 / 1024;
+
+}
+
+void ImageLoadBandwidth::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void ImageLoadBandwidth::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "======================================"
+                         "======================================" << std::endl;
+  std::cout << " Image Size(bytes):              LoadBandwidth(GB/S):    "
+            << std::endl;
+  std::cout << " " << image_size_ << "                                "
+            << load_bandwidth_ << std::endl;
+}
+
@@ -0,0 +1,82 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
+#define __ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
+
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+#include "perf_common/perf_base.h"
+
+class ImageLoadBandwidth: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  ImageLoadBandwidth();
+
+  //@Brief: Destructor
+  ~ImageLoadBandwidth();
+
+  //@Brief: Set up the test environment
+  virtual void SetUp();
+
+  //@Brief: Run the actual testing
+  virtual void Run();
+
+  //@Brief: Clean up the test environment
+  virtual void Close();
+
+  //@Brief: Display  results
+  virtual void DisplayResults() const;
+
+ private:
+  //@Brief: Image Load Bandwidth
+  double load_bandwidth_;
+
+  //@Brief: Image size
+  size_t image_size_;
+};
+
+#endif //__ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
+
@@ -0,0 +1,271 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "image_store_bandwidth.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "gtest/gtest.h"
+#include "hsa/hsa_ext_image.h"
+#include <stdio.h>
+#include <vector>
+
+// Constructor of the class
+ImageStoreBandwidth::ImageStoreBandwidth() :
+  BaseRocR() {
+  store_bandwidth_ = 0.0;
+  store_bandwidth_ = 0.0;
+  image_size_ = 0;
+
+  set_requires_profile (HSA_PROFILE_FULL);
+}
+
+// Destructor of the class
+ImageStoreBandwidth::~ImageStoreBandwidth() {
+
+}
+
+// Set up the environment
+void ImageStoreBandwidth::SetUp() {
+
+  set_kernel_file_name("store_2d_image.o");
+  set_kernel_name("&__OpenCL_store_2d_image_kernel");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  //Create a queue with max number size
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  set_main_queue(q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  //Fill up part of aql
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().setup = 0;
+  aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+
+  return;
+}
+
+// Run the test
+void ImageStoreBandwidth::Run() {
+  hsa_status_t err;
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  hsa_ext_image_descriptor_t image_descriptor;
+  image_descriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
+  image_descriptor.width = 256;
+  image_descriptor.height = 256;
+  image_descriptor.depth = 1;
+  image_descriptor.array_size = 0;
+  image_descriptor.format.channel_type =
+    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
+  image_descriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
+
+  hsa_ext_image_format_t image_format;
+  image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
+  image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
+
+  // Check if device_ supports at least read only operation on image format
+  uint32_t capability_mask;
+  err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
+                                     &image_format, &capability_mask);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY)) {
+    std::cout << 
+     "Device does not support read and write operation on this kind of image!"
+        << std::endl;
+    ASSERT_NE(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY, 0);
+  }
+
+  // Get image info
+  hsa_ext_image_data_info_t image_info;
+  err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
+                                    HSA_ACCESS_PERMISSION_RW, &image_info);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  image_size_ = image_info.size;
+
+  std::vector<double> time;
+
+  for (uint32_t i = 0; i < num_iteration(); i++) {
+#ifdef DEBUG
+    std::cout << ".";
+    fflush(stdout);
+#endif
+    // Allocate memory space for image
+    err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
+                                                                   &cpu_pool());
+    ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+
+    uintptr_t ptr_temp = 0;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(),
+                                       image_info.size + image_info.alignment,
+                                                         0, (void**) &ptr_temp);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Align the image address
+    uintptr_t mul = ptr_temp / image_info.alignment;
+    void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
+
+    // rocrtst::CommonCleanUp the image memory to 0
+    err = hsa_amd_memory_fill(ptr_image, 0, image_info.size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Create image handle
+    hsa_ext_image_t image_handle;
+    err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
+                               HSA_ACCESS_PERMISSION_RO, &image_handle);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    // Allocate and initialize the kernel argument
+    typedef struct args_t {
+      uint64_t arg0;
+      int istart;
+      int iend;
+      int istep;
+    } args;
+
+    //int local_out = 5;
+    int istart = 0;
+    int iend = 64;
+    int istep = 1;
+
+    args* kern_ptr = NULL;
+    err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
+                                       (void**) &kern_ptr);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    kern_ptr->arg0 = image_handle.handle;
+    kern_ptr->istart = istart;
+    kern_ptr->iend = iend;
+    kern_ptr->istep = istep;
+
+    aql().kernarg_address = kern_ptr;
+
+    // Obtain the current queue write index
+    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+    void *q_base_addr = main_queue()->base_address;
+    // Write the aql packet at the calculated queue index address.
+    const uint32_t queue_mask = main_queue()->size - 1;
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
+
+    rocrtst::PerfTimer p_timer;
+    int id = p_timer.CreateTimer();
+    p_timer.StartTimer(id);
+
+    ((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
+                      HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+    hsa_signal_store_release(main_queue()->doorbell_signal, index);
+
+    // Wait on the dispatch signal until the kernel is finished.
+    while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                     (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
+      ;
+
+    p_timer.StopTimer(id);
+
+    time.push_back(p_timer.ReadTimer(id));
+
+    hsa_signal_store_release(signal(), 1);
+
+    err = hsa_ext_image_destroy(*gpu_dev, image_handle);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    err = hsa_memory_deregister(ptr_image, image_info.size);
+    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+    hsa_amd_memory_pool_free(reinterpret_cast<void*>(ptr_temp));
+  }
+
+  // Calculte the mean load time
+  time.erase(time.begin());
+#ifdef DEBUG
+
+  for (size_t i = 0; i < time.size(); i++) {
+    std::cout << time[i] << std::endl;
+  }
+
+#endif
+  double mean_time = rocrtst::CalcMean(time);
+  std::cout << "mean time: " << mean_time << std::endl;
+
+  store_bandwidth_ = image_size_ / mean_time / 1024 / 1024 / 1024;
+}
+
+void ImageStoreBandwidth::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void ImageStoreBandwidth::DisplayResults() const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "============================================="
+                                "===============================" << std::endl;
+
+  std::cout << " Image Size(bytes):              StoreBandwidth(GB/S):    "
+            << std::cout;
+  std::cout << " " << image_size_ << "                                "
+            << store_bandwidth_ << std::endl;
+}
+
@@ -0,0 +1,82 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
+#define __ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+
+class ImageStoreBandwidth: public rocrtst::BaseRocR, public PerfBase {
+ public:
+  //@Brief: Constructor
+  ImageStoreBandwidth();
+
+  //@Brief: Destructor
+  ~ImageStoreBandwidth();
+
+  //@Brief: Set up the test environment
+  virtual void SetUp();
+
+  //@Brief: Run the actual testing
+  virtual void Run();
+
+  //@Brief: Clean up the test environment
+  virtual void Close();
+
+  //@Brief: Display  results
+  virtual void DisplayResults() const;
+
+ private:
+  //@Brief: Image Store Bandwidth
+  double store_bandwidth_;
+
+  //@Brief: Image size
+  size_t image_size_;
+};
+
+#endif //__ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
+
@@ -0,0 +1,12 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__Empty_kernel()
+{
+
+	ret;
+};
+
@@ -0,0 +1,88 @@
+module &m:1:0:$full:$large:$default;
+
+/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.
+ *
+ * HSAF is granting you permission to use this software and documentation (if
+ * any) (collectively, the "Materials") pursuant to the terms and conditions
+ * of the Software License Agreement included with the Materials.  If you do
+ * not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
+ */
+
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+
+/**
+ * @brief Hsail kernel to benchmark READ accesses to system memory.
+ * The kernel is given a input buffer from which each each thread will
+ * read. The thread will read from multiple locations of the input buffer.
+ * The locations to read from is determined by the work-item Id, the function
+ * being work-item Id modulo total number of work-items in the global work grid.
+ * So given a global work grid of 16 work-items the reads by a thread with absolute
+ * id 4 would be 4, 20, 36, 52, etc.
+ *
+ * @NOTE: A constraint imposed by the kernel is that the buffer size be large
+ * enough to support 16 reads by each thread. So a dispatch of 8 work-items
+ * should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
+ *
+ * @param bufStart beginning byte address of user buffer in system memory
+ * from which kernel threads could read
+ *
+ * @param bufEnd byte address that follows the end of user buffer. Accessing
+ * memory at bufEnd is illegal
+ *
+ * @param addrStep size by which to increment byte address following each read
+ * operation. The value represents total number of work-items * sizeof(uint32_t)
+ *
+ * @param outAddr argument that is passed by the user to be updated with values
+ * read by the kernel threads. This is ensure compiler and finalizer do not eliminate
+ * code because the values being read are not used in any meaningfule way.
+ *
+ */
+prog kernel &main(kernarg_u64 %outAddr) {
+
+  pragma  "AMD RTI", "ARGSTART:__SysMemLoad";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemLoad";
+
+  ld_kernarg_u64    $d0, [%outAddr];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Read operation
+  workitemflatabsid_u32  $s0;
+  shl_u32         $s0, $s0, 2;
+  cvt_u64_u32     $d4, $s0;
+
+  // Add index to base address of user buffer to obtain
+  // effective address for access
+  add_u64         $d0, $d0, $d4;
+
+  mov_u32         $s2, 1;
+
+  st_global_u32   $s2, [$d0];
+
+};
+
@@ -0,0 +1,88 @@
+module &m:1:0:$base:$large:$default;
+
+/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.
+ *
+ * HSAF is granting you permission to use this software and documentation (if
+ * any) (collectively, the "Materials") pursuant to the terms and conditions
+ * of the Software License Agreement included with the Materials.  If you do
+ * not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
+ */
+
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+
+/**
+ * @brief Hsail kernel to benchmark READ accesses to system memory.
+ * The kernel is given a input buffer from which each each thread will
+ * read. The thread will read from multiple locations of the input buffer.
+ * The locations to read from is determined by the work-item Id, the function
+ * being work-item Id modulo total number of work-items in the global work grid.
+ * So given a global work grid of 16 work-items the reads by a thread with absolute
+ * id 4 would be 4, 20, 36, 52, etc.
+ *
+ * @NOTE: A constraint imposed by the kernel is that the buffer size be large
+ * enough to support 16 reads by each thread. So a dispatch of 8 work-items
+ * should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
+ *
+ * @param bufStart beginning byte address of user buffer in system memory
+ * from which kernel threads could read
+ *
+ * @param bufEnd byte address that follows the end of user buffer. Accessing
+ * memory at bufEnd is illegal
+ *
+ * @param addrStep size by which to increment byte address following each read
+ * operation. The value represents total number of work-items * sizeof(uint32_t)
+ *
+ * @param outAddr argument that is passed by the user to be updated with values
+ * read by the kernel threads. This is ensure compiler and finalizer do not eliminate
+ * code because the values being read are not used in any meaningfule way.
+ *
+ */
+prog kernel &main(kernarg_u64 %outAddr) {
+
+  pragma  "AMD RTI", "ARGSTART:__SysMemLoad";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemLoad";
+
+  ld_kernarg_u64    $d0, [%outAddr];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Read operation
+  workitemflatabsid_u32  $s0;
+  shl_u32         $s0, $s0, 2;
+  cvt_u64_u32     $d4, $s0;
+
+  // Add index to base address of user buffer to obtain
+  // effective address for access
+  add_u64         $d0, $d0, $d4;
+
+  mov_u32         $s2, 1;
+
+  st_global_u32   $s2, [$d0];
+
+};
+
@@ -0,0 +1,109 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__OpenCL_load_2d_image_kernel(
+	kernarg_rwimg %input,
+	kernarg_u64 %result,
+	kernarg_u32 %istart,
+	kernarg_u32 %iend,
+	kernarg_u32 %istep)
+{
+	pragma  "AMD RTI", "ARGSTART:__OpenCL_load_2d_image_kernel";
+	pragma  "AMD RTI", "version:3:1:104";
+	pragma  "AMD RTI", "device:generic";
+	pragma  "AMD RTI", "uniqueid:1024";
+	pragma  "AMD RTI", "function:1:0";
+	pragma  "AMD RTI", "memory:64bitABI";
+	pragma  "AMD RTI", "uavid:8";
+	pragma  "AMD RTI", "privateid:8";
+	pragma  "AMD RTI", "ARGEND:__OpenCL_load_2d_image_kernel";
+
+@__OpenCL_load_2d_image_kernel_entry:
+	// BB#0:                                // %entry
+	workitemabsid_u32	$s0, 1;
+	workitemabsid_u32	$s1, 0;
+	ld_kernarg_rwimg $d5, [%input];
+	ld_kernarg_u32 $s2, [%istart];
+	ld_kernarg_u32 $s3, [%iend];
+	ld_kernarg_u32 $s4, [%istep];
+
+    add_u32 $s9, 0, 0; // reset s9 to zero
+@loop:
+    add_u32 $s2, $s2, $s4;
+	
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0); //(coordWidth, coordHeight)
+    add_u32 $s9, $s9, $s5;
+
+    //force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+	
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s6;
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s7;
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s8;
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s5;
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+	
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s6;
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+    add_u32 $s9, $s9, $s7;
+		
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+	
+	ldimage_v4_2d_u32_rwimg_u32	($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
+	ld_kernarg_align(8)_width(all)_u64	$d4, [%result];
+    add_u32 $s9, $s9, $s8;
+	
+	st_u32 $s9, [$d4];
+
+//loop until we hit condition
+    cmp_lt_b1_u32 $c0, $s2, $s3;
+    cbr_b1 $c0, @loop;
+};
@@ -0,0 +1,37 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+
+/* This function takes in 2 memory locations, one storing a number of 
+ iterations to execute, and the other a place to store a result.
+ The function iterates through a loop "iteration" times, and stores
+ the number of iterations executed in the "results" location. 
+ A successful run is when the value stored in %iteration is the 
+ same as the value store in %results.
+*/
+
+prog kernel &__simple_kernel(
+	kernarg_u64 %iteration,
+	kernarg_u64 %results)
+{
+        ret;
+	ld_kernarg_align(8)_width(all)_u64 $d1, [%iteration];
+	ld_kernarg_align(8)_width(all)_u64 $d2, [%results];
+
+	ld_global_u32 $s1, [$d1];
+	mov_u32 $s2, 0;
+
+
+@loop:
+        add_u32 $s2, $s2, 1;
+	cmp_lt_b1_u32 $c0, $s2, $s1;
+	cbr_b1 $c0, @loop;
+
+	st_global_u32 $s2, [$d2];
+	
+	ret;
+};
+
@@ -0,0 +1,28 @@
+module &m:1:0:$base:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__simple_kernel(
+	kernarg_u64 %iteration,
+	kernarg_u64 %results)
+{
+	
+	ld_kernarg_align(8)_width(all)_u64 $d1, [%iteration];
+	ld_kernarg_align(8)_width(all)_u64 $d2, [%results];
+
+	ld_global_u32 $s1, [$d1];
+	mov_u32 $s2, 0;
+
+
+@loop:
+        add_u32 $s2, $s2, 1;
+	cmp_lt_b1_u32 $c0, $s2, $s1;
+	cbr_b1 $c0, @loop;
+
+	st_global_u32 $s2, [$d2];
+	
+	ret;
+};
+
@@ -0,0 +1,105 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__OpenCL_store_2d_image_kernel(
+    kernarg_rwimg %output,
+	kernarg_u32 %istart,
+	kernarg_u32 %iend,
+	kernarg_u32 %istep)
+{
+	pragma  "AMD RTI", "ARGSTART:__OpenCL_store_2d_image_kernel";
+	pragma  "AMD RTI", "version:3:1:104";
+	pragma  "AMD RTI", "device:generic";
+	pragma  "AMD RTI", "uniqueid:1024";
+	pragma  "AMD RTI", "function:1:0";
+	pragma  "AMD RTI", "memory:64bitABI";
+	pragma  "AMD RTI", "uavid:8";
+	pragma  "AMD RTI", "privateid:8";
+	pragma  "AMD RTI", "ARGEND:__OpenCL_store_2d_image_kernel";
+
+@__OpenCL_store_2d_image_kernel_entry:
+	// BB#0:                                // %entry
+	workitemabsid_u32	$s0, 1;
+	workitemabsid_u32	$s1, 0;
+    ld_kernarg_rwimg $d5, [%output];
+	ld_kernarg_u32 $s2, [%istart];
+	ld_kernarg_u32 $s3, [%iend];
+	ld_kernarg_u32 $s4, [%istep];
+	
+	mov_b32	$s5, 0;
+@loop:
+    add_u32 $s2, $s2, $s4;
+    add_u32 $s5, $s5, 1;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+    
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+	
+	//force to retrieve different image elements
+	add_u32 $s1, $s1, 64;
+	and_b32 $s1, $s1, 255;
+	add_u32 $s0, $s0, 64;
+	and_b32 $s0, $s0, 255;
+
+    add_u32 $s5, $s5, $s2;
+	stimage_v4_2d_u32_rwimg_u32	($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
+
+//loop until we hit condition
+    cmp_lt_b1_u32 $c0, $s2, $s3;
+	cbr_b1 $c0, @loop;
+	ret;
+};
@@ -0,0 +1,237 @@
+module &m:1:0:$full:$large:$default;
+
+/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.
+ *
+ * HSAF is granting you permission to use this software and documentation (if
+ * any) (collectively, the "Materials") pursuant to the terms and conditions
+ * of the Software License Agreement included with the Materials.  If you do
+ * not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
+ */
+
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+
+/**
+ * @brief Hsail kernel to benchmark READ accesses to system memory.
+ * The kernel is given a input buffer from which each each thread will
+ * read. The thread will read from multiple locations of the input buffer.
+ * The locations to read from is determined by the work-item Id, the function
+ * being work-item Id modulo total number of work-items in the global work grid.
+ * So given a global work grid of 16 work-items the reads by a thread with absolute
+ * id 4 would be 4, 20, 36, 52, etc.
+ *
+ * @NOTE: A constraint imposed by the kernel is that the buffer size be large
+ * enough to support 16 reads by each thread. So a dispatch of 8 work-items
+ * should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
+ *
+ * @param bufStart beginning byte address of user buffer in system memory
+ * from which kernel threads could read
+ *
+ * @param bufEnd byte address that follows the end of user buffer. Accessing
+ * memory at bufEnd is illegal
+ *
+ * @param addrStep size by which to increment byte address following each read
+ * operation. The value represents total number of work-items * sizeof(uint32_t)
+ *
+ * @param outAddr argument that is passed by the user to be updated with values
+ * read by the kernel threads. This is ensure compiler and finalizer do not eliminate
+ * code because the values being read are not used in any meaningfule way.
+ *
+ */
+prog kernel &__SysMemLoad(kernarg_u64 %bufStart,
+                          kernarg_u64 %bufEnd,
+                          kernarg_u64 %addrStep,
+                          kernarg_u64 %outAddr) {
+
+  pragma  "AMD RTI", "ARGSTART:__SysMemLoad";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemLoad";
+
+  // Retrieve the values of input arguments
+  // bufStart refers to the starting byte address
+  // bufEnd refers to the end of byte address
+  // addrStep refers to the product of total number
+  // of work-items in the grid * sizeof(uint32_t)
+  ld_kernarg_u64    $d0, [%bufStart];
+  ld_kernarg_u64    $d1, [%bufEnd];
+  ld_kernarg_u64    $d2, [%addrStep];
+  ld_kernarg_u64    $d3, [%outAddr];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Read operation
+  workitemflatabsid_u32  $s0;
+  shl_u32         $s0, $s0, 2;
+  cvt_u64_u32     $d4, $s0;
+
+  // Add index to base address of user buffer to obtain
+  // effective address for access
+  add_u64         $d0, $d0, $d4;
+  add_u64         $d3, $d3, $d4;
+
+  // Initialize thread's read accumulator to zero
+  mov_u32         $s2, 0;
+
+@loop:
+
+  // Read sixteeen values with a stride that is
+  // determined by the total number of work-items
+  // in the global grid
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  // Update output buffer with values read
+  // from input buffer
+  st_global_u32   $s2, [$d3];
+
+};
+
@@ -0,0 +1,237 @@
+module &m:1:0:$base:$large:$default;
+
+/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.
+ *
+ * HSAF is granting you permission to use this software and documentation (if
+ * any) (collectively, the "Materials") pursuant to the terms and conditions
+ * of the Software License Agreement included with the Materials.  If you do
+ * not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
+ */
+
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+
+/**
+ * @brief Hsail kernel to benchmark READ accesses to system memory.
+ * The kernel is given a input buffer from which each each thread will
+ * read. The thread will read from multiple locations of the input buffer.
+ * The locations to read from is determined by the work-item Id, the function
+ * being work-item Id modulo total number of work-items in the global work grid.
+ * So given a global work grid of 16 work-items the reads by a thread with absolute
+ * id 4 would be 4, 20, 36, 52, etc.
+ *
+ * @NOTE: A constraint imposed by the kernel is that the buffer size be large
+ * enough to support 16 reads by each thread. So a dispatch of 8 work-items
+ * should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
+ *
+ * @param bufStart beginning byte address of user buffer in system memory
+ * from which kernel threads could read
+ *
+ * @param bufEnd byte address that follows the end of user buffer. Accessing
+ * memory at bufEnd is illegal
+ *
+ * @param addrStep size by which to increment byte address following each read
+ * operation. The value represents total number of work-items * sizeof(uint32_t)
+ *
+ * @param outAddr argument that is passed by the user to be updated with values
+ * read by the kernel threads. This is ensure compiler and finalizer do not eliminate
+ * code because the values being read are not used in any meaningfule way.
+ *
+ */
+prog kernel &__SysMemLoad(kernarg_u64 %bufStart,
+                          kernarg_u64 %bufEnd,
+                          kernarg_u64 %addrStep,
+                          kernarg_u64 %outAddr) {
+
+  pragma  "AMD RTI", "ARGSTART:__SysMemLoad";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemLoad";
+
+  // Retrieve the values of input arguments
+  // bufStart refers to the starting byte address
+  // bufEnd refers to the end of byte address
+  // addrStep refers to the product of total number
+  // of work-items in the grid * sizeof(uint32_t)
+  ld_kernarg_u64    $d0, [%bufStart];
+  ld_kernarg_u64    $d1, [%bufEnd];
+  ld_kernarg_u64    $d2, [%addrStep];
+  ld_kernarg_u64    $d3, [%outAddr];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Read operation
+  workitemflatabsid_u32  $s0;
+  shl_u32         $s0, $s0, 2;
+  cvt_u64_u32     $d4, $s0;
+
+  // Add index to base address of user buffer to obtain
+  // effective address for access
+  add_u64         $d0, $d0, $d4;
+  add_u64         $d3, $d3, $d4;
+
+  // Initialize thread's read accumulator to zero
+  mov_u32         $s2, 0;
+
+@loop:
+
+  // Read sixteeen values with a stride that is
+  // determined by the total number of work-items
+  // in the global grid
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  ld_global_u32   $s1, [$d0];
+  add_u32         $s2, $s1, $s2;
+  add_u64         $d0, $d0, $d2;
+
+  // Update output buffer with values read
+  // from input buffer
+  st_global_u32   $s2, [$d3];
+
+};
+
@@ -0,0 +1,105 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__SysMemStore(kernarg_u64 %bufStart,
+                           kernarg_u64 %bufEnd,
+                           kernarg_u64 %addrStep,
+                           kernarg_u64 %deadArg) {
+
+  // Directives for Compiler
+  pragma  "AMD RTI", "ARGSTART:__SysMemStore";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemStore";
+
+  // Retrieve the values of input arguments
+  // bufStart refers to the starting byte address
+  // bufEnd refers to the end of byte address
+  // addrStep refers to the product of total number
+  // of work-items in the grid * sizeof(uint32_t)
+  ld_kernarg_u64     $d0, [%bufStart];
+  ld_kernarg_u64     $d1, [%bufEnd];
+  ld_kernarg_u64     $d2, [%addrStep];
+  ld_kernarg_u64     $d3, [%deadArg];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Write operation
+  workitemflatabsid_u32  $s0;
+  shl_u32            $s0, $s0, 2;
+
+  // Convert the thread id into a 64-bit number
+  // and add it to the starting address of user
+  // buffer to obtain effective address for access
+  cvt_u64_u32     $d4, $s0;
+  add_u64         $d0, $d0, $d4;
+
+
+@loop:
+
+  // Write sixteeen values with a stride that is
+  // determined by the total number of work-items
+  // in the global grid
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  // Loop until we hit end of buffer [%bufEnd]
+  cmp_lt_b1_u64   $c0, $d0, $d1;
+  cbr_b1          $c0, @loop;
+
+};
+
@@ -0,0 +1,105 @@
+module &m:1:0:$base:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__SysMemStore(kernarg_u64 %bufStart,
+                           kernarg_u64 %bufEnd,
+                           kernarg_u64 %addrStep,
+                           kernarg_u64 %deadArg) {
+
+  // Directives for Compiler
+  pragma  "AMD RTI", "ARGSTART:__SysMemStore";
+  pragma  "AMD RTI", "version:3:1:104";
+  pragma  "AMD RTI", "device:generic";
+  pragma  "AMD RTI", "uniqueid:1024";
+  pragma  "AMD RTI", "function:1:0";
+  pragma  "AMD RTI", "memory:64bitABI";
+  pragma  "AMD RTI", "uavid:8";
+  pragma  "AMD RTI", "privateid:8";
+  pragma  "AMD RTI", "ARGEND:__SysMemStore";
+
+  // Retrieve the values of input arguments
+  // bufStart refers to the starting byte address
+  // bufEnd refers to the end of byte address
+  // addrStep refers to the product of total number
+  // of work-items in the grid * sizeof(uint32_t)
+  ld_kernarg_u64     $d0, [%bufStart];
+  ld_kernarg_u64     $d1, [%bufEnd];
+  ld_kernarg_u64     $d2, [%addrStep];
+  ld_kernarg_u64     $d3, [%deadArg];
+
+  // Compute the absolute id of current thread
+  // and shift it by two to get index into user
+  // buffer to access for Write operation
+  workitemflatabsid_u32  $s0;
+  shl_u32            $s0, $s0, 2;
+
+  // Convert the thread id into a 64-bit number
+  // and add it to the starting address of user
+  // buffer to obtain effective address for access
+  cvt_u64_u32     $d4, $s0;
+  add_u64         $d0, $d0, $d4;
+
+
+@loop:
+
+  // Write sixteeen values with a stride that is
+  // determined by the total number of work-items
+  // in the global grid
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  st_global_u32   $s0, [$d0];
+  add_u64         $d0, $d0, $d2;
+
+  // Loop until we hit end of buffer [%bufEnd]
+  cmp_lt_b1_u64   $c0, $d0, $d1;
+  cbr_b1          $c0, @loop;
+
+};
+
@@ -0,0 +1,53 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__OpenCL_vec_assign_kernel(
+	kernarg_u64 %buf,
+	kernarg_u32 %num)
+{
+	pragma  "AMD RTI", "ARGSTART:__OpenCL_vec_assign_kernel";
+	pragma  "AMD RTI", "version:3:1:104";
+	pragma  "AMD RTI", "device:generic";
+	pragma  "AMD RTI", "uniqueid:1024";
+	pragma  "AMD RTI", "function:1:0";
+	pragma  "AMD RTI", "memory:64bitABI";
+	pragma  "AMD RTI", "uavid:8";
+	pragma  "AMD RTI", "privateid:8";
+	pragma  "AMD RTI", "ARGEND:__OpenCL_vec_assign_kernel";
+
+@__OpenCL_vec_assign_kernel_entry:
+	// BB#0:                                // %entry
+	ld_kernarg_align(8)_width(all)_u64	$d0, [%buf];
+	ld_global_u32	$s1, [$d0];
+	ld_kernarg_align(4)_width(all)_u32	$s0, [%num];
+	cmp_ge_b1_s32	$c0, $s1, $s0;
+	cbr_b1	$c0, @BB0_4;
+	// BB#1:                                // %while.body.lr.ph
+	workitemabsid_u32	$s1, 0;
+	cmp_eq_b1_s32	$c0, $s1, 0;
+	cbr_b1	$c0, @BB0_2;
+
+@BB0_3:
+	// %while.cond.backedge
+	ld_global_u32	$s1, [$d0];
+	cmp_lt_b1_s32	$c0, $s1, $s0;
+	cbr_b1	$c0, @BB0_3;
+	br	@BB0_4;
+
+@BB0_2:
+	// %while.cond.backedge.us
+	ld_global_u32	$s1, [$d0];
+	add_u32	$s1, $s1, 1;
+	st_global_u32	$s1, [$d0];
+	ld_global_u32	$s1, [$d0];
+	cmp_lt_b1_s32	$c0, $s1, $s0;
+	cbr_b1	$c0, @BB0_2;
+
+@BB0_4:
+	// %while.end
+	ret;
+};
+
@@ -0,0 +1,108 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__OpenCL_matrixTranspose_kernel(
+	kernarg_u64 %__global_offset_0,
+	kernarg_u64 %__global_offset_1,
+	kernarg_u64 %__global_offset_2,
+	kernarg_u64 %__printf_buffer,
+	kernarg_u64 %__vqueue_pointer,
+	kernarg_u64 %__aqlwrap_pointer,
+	kernarg_u64 %inBuf,
+	kernarg_u64 %outBuf,
+	kernarg_u64 %localBuf,
+	kernarg_u32 %blockSize,
+	kernarg_u32 %width,
+	kernarg_u32 %height)
+{
+	pragma  "AMD RTI", "ARGSTART:__OpenCL_matrixTranspose_kernel";
+	pragma  "AMD RTI", "version:3:1:104";
+	pragma  "AMD RTI", "device:generic";
+	pragma  "AMD RTI", "uniqueid:1024";
+	pragma  "AMD RTI", "memory:private:0";
+	pragma  "AMD RTI", "memory:region:0";
+	pragma  "AMD RTI", "memory:local:0";
+	pragma  "AMD RTI", "value:__global_offset_0:u64:1:1:0";
+	pragma  "AMD RTI", "value:__global_offset_1:u64:1:1:16";
+	pragma  "AMD RTI", "value:__global_offset_2:u64:1:1:32";
+	pragma  "AMD RTI", "pointer:__printf_buffer:u8:1:1:48:uav:7:1:RW:0:0:0";
+	pragma  "AMD RTI", "value:__vqueue_pointer:u64:1:1:64";
+	pragma  "AMD RTI", "value:__aqlwrap_pointer:u64:1:1:80";
+	pragma  "AMD RTI", "pointer:inBuf:u32:1:1:96:uav:7:4:RW:0:1:0";
+	pragma  "AMD RTI", "pointer:outBuf:u32:1:1:112:uav:7:4:RW:0:1:0";
+	pragma  "AMD RTI", "pointer:localBuf:u32:1:1:128:l:7:4:RW:0:0:0";
+	pragma  "AMD RTI", "value:blockSize:u32:1:1:144";
+	pragma  "AMD RTI", "value:width:u32:1:1:160";
+	pragma  "AMD RTI", "value:height:u32:1:1:176";
+	pragma  "AMD RTI", "function:1:0";
+	pragma  "AMD RTI", "memory:64bitABI";
+	pragma  "AMD RTI", "privateid:8";
+	pragma  "AMD RTI", "enqueue_kernel:0";
+	pragma  "AMD RTI", "kernel_index:0";
+	pragma  "AMD RTI", "reflection:0:size_t";
+	pragma  "AMD RTI", "reflection:1:size_t";
+	pragma  "AMD RTI", "reflection:2:size_t";
+	pragma  "AMD RTI", "reflection:3:size_t";
+	pragma  "AMD RTI", "reflection:4:size_t";
+	pragma  "AMD RTI", "reflection:5:size_t";
+	pragma  "AMD RTI", "reflection:6:uint*";
+	pragma  "AMD RTI", "reflection:7:uint*";
+	pragma  "AMD RTI", "reflection:8:uint*";
+	pragma  "AMD RTI", "reflection:9:uint";
+	pragma  "AMD RTI", "reflection:10:uint";
+	pragma  "AMD RTI", "reflection:11:uint";
+	pragma  "AMD RTI", "ARGEND:__OpenCL_matrixTranspose_kernel";
+
+@__OpenCL_matrixTranspose_kernel_entry:
+	// BB#0:                                // %entry
+	workitemid_u32	$s0, 1;
+	ld_kernarg_align(4)_width(all)_u32	$s1, [%blockSize];
+	workitemid_u32	$s2, 0;
+	mad_u32	$s3, $s2, $s1, $s0;
+	cvt_u64_u32	$d1, $s3;
+	workitemabsid_u32	$s3, 0;
+	cvt_u64_u32	$d0, $s3;
+	ld_kernarg_align(8)_width(all)_u64	$d2, [%__global_offset_0];
+	add_u64	$d0, $d0, $d2;
+	workitemabsid_u32	$s5, 1;
+	workgroupid_u32	$s4, 0;
+	workgroupid_u32	$s3, 1;
+	shl_u64	$d1, $d1, 2;
+	mad_u32	$s3, $s3, $s1, $s2;
+	mad_u32	$s4, $s4, $s1, $s0;
+	cvt_u64_u32	$d2, $s5;
+	ld_kernarg_align(8)_width(all)_u64	$d3, [%__global_offset_1];
+	cvt_u32_u64	$s5, $d0;
+	add_u64	$d0, $d2, $d3;
+	cvt_u32_u64	$s6, $d0;
+	ld_kernarg_align(4)_width(all)_u32	$s7, [%width];
+	ld_kernarg_align(8)_width(all)_u64	$d0, [%localBuf];
+	ld_kernarg_align(4)_width(all)_u32	$s8, [%height];
+	mad_u32	$s3, $s4, $s8, $s3;
+	add_u64	$d1, $d0, $d1;
+	cvt_u32_u64	$s4, $d1;
+	mad_u32	$s5, $s6, $s7, $s5;
+	cvt_u64_u32	$d1, $s5;
+	shl_u64	$d2, $d1, 2;
+	ld_kernarg_align(8)_width(all)_u64	$d1, [%outBuf];
+	ld_kernarg_align(8)_width(all)_u64	$d3, [%inBuf];
+	add_u64	$d2, $d3, $d2;
+	ld_global_align(4)_u32	$s5, [$d2];
+	st_group_align(4)_u32	$s5, [$s4];
+	cvt_u64_u32	$d2, $s3;
+	shl_u64	$d2, $d2, 2;
+	add_u64	$d1, $d1, $d2;
+	mad_u32	$s0, $s0, $s1, $s2;
+	cvt_u64_u32	$d2, $s0;
+	shl_u64	$d2, $d2, 2;
+	add_u64	$d0, $d0, $d2;
+	cvt_u32_u64	$s0, $d0;
+	barrier;
+	ld_group_align(4)_u32	$s0, [$s0];
+	st_global_align(4)_u32	$s0, [$d1];
+	ret;
+};
+
@@ -0,0 +1,34 @@
+module &m:1:0:$full:$large:$default;
+extension "amd:gcn";
+extension "IMAGE";
+
+decl prog function &abort()();
+
+prog kernel &__vector_copy_kernel(
+	kernarg_u64 %a,
+	kernarg_u64 %b)
+{
+	pragma  "AMD RTI", "ARGSTART:__vector_copy_kernel";
+	pragma  "AMD RTI", "version:3:1:104";
+	pragma  "AMD RTI", "device:generic";
+	pragma  "AMD RTI", "uniqueid:1024";
+	pragma  "AMD RTI", "function:1:0";
+	pragma  "AMD RTI", "memory:64bitABI";
+	pragma  "AMD RTI", "uavid:8";
+	pragma  "AMD RTI", "privateid:8";
+	pragma  "AMD RTI", "ARGEND:__vector_copy_kernel";
+
+@__vector_copy_kernel_entry:
+	// BB#0:                                // %entry
+	workitemabsid_u32	$s0, 0;
+	cvt_s64_s32	$d0, $s0;
+	shl_u64	$d0, $d0, 2;
+	ld_kernarg_align(8)_width(all)_u64	$d1, [%b];
+	add_u64	$d1, $d1, $d0;
+	ld_kernarg_align(8)_width(all)_u64	$d2, [%a];
+	add_u64	$d0, $d2, $d0;
+	ld_global_u32	$s0, [$d0];
+	st_global_u32	$s0, [$d1];
+	ret;
+};
+
@@ -0,0 +1,64 @@
+module &m:1:0:$base:$large:$default;
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The University of Illinois/NCSA
+// Open Source License (NCSA)
+//
+// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
+//
+// Developed by:
+//
+//                 AMD Research and AMD HSA Software Development
+//
+//                 Advanced Micro Devices, Inc.
+//
+//                 www.amd.com
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal with the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+//  - Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimers.
+//  - Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimers in
+//    the documentation and/or other materials provided with the distribution.
+//  - Neither the names of Advanced Micro Devices, Inc,
+//    nor the names of its contributors may be used to endorse or promote
+//    products derived from this Software without specific prior written
+//    permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS WITH THE SOFTWARE.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+decl prog function &abort()();
+
+prog kernel &__vector_copy_kernel(
+	kernarg_u64 %in,
+	kernarg_u64 %out)
+{
+@__vector_copy_kernel_entry:
+	// BB#0:                                // %entry
+	workitemabsid_u32	$s0, 0;
+	cvt_s64_s32	$d0, $s0;
+	shl_u64	$d0, $d0, 2;
+	ld_kernarg_align(8)_width(all)_u64	$d1, [%out];
+	add_u64	$d1, $d1, $d0;
+	ld_kernarg_align(8)_width(all)_u64	$d2, [%in];
+	add_u64	$d0, $d2, $d0;
+	ld_global_u32	$s0, [$d0];
+	st_global_u32	$s0, [$d1];
+	ret;
+};
@@ -0,0 +1,64 @@
+module &m:1:0:$full:$large:$default;
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The University of Illinois/NCSA
+// Open Source License (NCSA)
+//
+// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
+//
+// Developed by:
+//
+//                 AMD Research and AMD HSA Software Development
+//
+//                 Advanced Micro Devices, Inc.
+//
+//                 www.amd.com
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal with the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+//  - Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimers.
+//  - Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimers in
+//    the documentation and/or other materials provided with the distribution.
+//  - Neither the names of Advanced Micro Devices, Inc,
+//    nor the names of its contributors may be used to endorse or promote
+//    products derived from this Software without specific prior written
+//    permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS WITH THE SOFTWARE.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+decl prog function &abort()();
+
+prog kernel &__vector_copy_kernel(
+	kernarg_u64 %in,
+	kernarg_u64 %out)
+{
+@__vector_copy_kernel_entry:
+	// BB#0:                                // %entry
+	workitemabsid_u32	$s0, 0;
+	cvt_s64_s32	$d0, $s0;
+	shl_u64	$d0, $d0, 2;
+	ld_kernarg_align(8)_width(all)_u64	$d1, [%out];
+	add_u64	$d1, $d1, $d0;
+	ld_kernarg_align(8)_width(all)_u64	$d2, [%in];
+	add_u64	$d0, $d2, $d0;
+	ld_global_u32	$s0, [$d0];
+	st_global_u32	$s0, [$d1];
+	ret;
+};
@@ -0,0 +1,280 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "cp_process_time.h"
+#include "cu_masking.h"
+#include "device_load_bandwidth.h"
+#include "device_store_bandwidth.h"
+#include "dispatch_time.h"
+#include "flush_latency.h"
+#include "gtest/gtest.h"
+#include "hsa_info.h"
+#include "image_bandwidth.h"
+#include "image_load_bandwidth.h"
+#include "image_store_bandwidth.h"
+#include "matrix_transpose.h"
+#include "memory_copy.h"
+#include "memory_allocation.h"
+#include "memory_async_copy.h"
+#include "queue_concurrency.h"
+#include "queue_create_destroy_latency.h"
+#include "system_load_bandwidth.h"
+#include "system_store_bandwidth.h"
+#include "vector_copy.h"
+
+/**
+ * Try to order tests from fastest running to slowest running.
+ */
+
+// DisplayResultsResults HSA system information first.
+TEST(rocrtst, Feature_Hsa_Info) {
+  HsaInfo hi;
+  hi.SetUp();
+  hi.Run();
+  hi.Close();
+}
+
+// Requires HSA_PFOFILE_FULL
+TEST(rocrtst, Perf_Image_Store_Bandwidth) {
+  ImageStoreBandwidth isb;
+  isb.SetUp();
+  isb.Run();
+  isb.DisplayResults();
+  isb.Close();
+}
+
+// Requires HSA_PFOFILE_FULL
+TEST(rocrtst, Perf_Image_Load_Bandwidth) {
+  ImageLoadBandwidth ilb;
+  ilb.SetUp();
+  ilb.Run();
+  ilb.DisplayResults();
+  ilb.Close();
+}
+
+// Requires HSA_PFOFILE_FULL
+TEST(rocrtst, Perf_Image_Bandwidth) {
+  ImageBandwidth ib;
+  ib.SetUp();
+  ib.Run();
+  ib.DisplayResults();
+  ib.Close();
+}
+
+// Requires HSA_PFOFILE_FULL
+TEST(rocrtst, Perf_Queue_Concurrency) {
+  QueueConcurrency mc;
+  mc.SetUp();
+  mc.Run();
+  mc.DisplayResults();
+  mc.Close();
+}
+
+TEST(rocrtst, Feature_Cu_Masking) {
+  CuMasking cm;
+  cm.SetUp();
+  cm.Run();
+  cm.Close();
+}
+
+TEST(rocrtst, Perf_Flush_Latency) {
+  FlushLatency fl;
+  fl.SetUp();
+  fl.Run();
+  fl.DisplayResults();
+  fl.Close();
+}
+
+// This test apparently has some sort of memory bounds overwrite
+// issue with the out_data_ buffer. Commenting out the free of
+// out_data_ avoids the problem. Left uncommented, a crash will
+// occur immediately or some time after.
+TEST(rocrtst, DISABLED_Perf_Device_Memory_Store_Bandwidth) {
+  DeviceStoreBandwidth slb;
+  slb.SetUp();
+  slb.Run();
+  slb.DisplayResults();
+  slb.Close();
+}
+
+// This test apparently has some sort of memory bounds overwrite
+// issue with the out_data_ buffer. Commenting out the free of
+// out_data_ avoids the problem. Left uncommented, a crash will
+// occur immediately or some time after.
+TEST(rocrtst, DISABLED_Perf_Device_Memory_Load_Bandwidth) {
+  DeviceLoadBandwidth slb;
+  slb.SetUp();
+  slb.Run();
+  slb.DisplayResults();
+  slb.Close();
+}
+TEST(rocrtst, Perf_Dispatch_Time_Single_SpinWait) {
+  DispatchTime dt;
+  dt.set_num_iteration(100);
+  dt.UseDefaultSignal(true);
+  dt.LaunchSingleKernel(true);
+  dt.SetUp();
+  dt.Run();
+  dt.DisplayResults();
+  dt.Close();
+}
+
+TEST(rocrtst, Perf_Dispatch_Time_Single_Interrupt) {
+  DispatchTime dt;
+  dt.UseDefaultSignal(false);
+  dt.LaunchSingleKernel(true);
+  dt.SetUp();
+  dt.Run();
+  dt.DisplayResults();
+  dt.Close();
+}
+
+TEST(rocrtst, Perf_Dispatch_Time_Multi_SpinWait) {
+  DispatchTime dt;
+  dt.UseDefaultSignal(true);
+  dt.LaunchSingleKernel(false);
+  dt.SetUp();
+  dt.Run();
+  dt.DisplayResults();
+  dt.Close();
+}
+
+TEST(rocrtst, Perf_Dispatch_Time_Multi_Interrupt) {
+  DispatchTime dt;
+  dt.UseDefaultSignal(false);
+  dt.LaunchSingleKernel(false);
+  dt.SetUp();
+  dt.Run();
+  dt.DisplayResults();
+  dt.Close();
+}
+TEST(rocrtst, DISABLED_Perf_CpProcessTime) {
+  CpProcessTime cpt;
+  cpt.set_num_iteration(10);
+  cpt.SetUp();
+  cpt.Run();
+  cpt.DisplayResults();
+  cpt.Close();
+}
+
+TEST(rocrtst, Perf_Memory_Allocation) {
+  MemoryAllocation ma(10);
+  ma.SetUp();
+  ma.Run();
+  ma.DisplayResults();
+  ma.Close();
+}
+
+#if MEM_POOL_FILL_BUG
+TEST(rocrtst, Perf_Queue_Latency) {
+  QueueLatency ql;
+  ql.set_num_iteration(10);
+  ql.SetUp();
+  ql.Run();
+  ql.DisplayResults();
+  ql.Close();
+}
+
+TEST(rocrtst, Perf_System_Memory_Load_Bandwidth) {
+  SystemLoadBandwidth slb;
+  slb.SetUp();
+  slb.Run();
+  slb.DisplayResults();
+  slb.Close();
+}
+
+TEST(rocrtst, Perf_System_Memory_Store_Bandwidth) {
+  SystemStoreBandwidth ssb;
+  ssb.SetUp();
+  ssb.Run();
+  ssb.DisplayResults();
+  ssb.Close();
+}
+
+TEST(rocrtst, Perf_Memory_Copy) {
+  MemoryCopy mc;
+  mc.set_num_iteration(10);
+  mc.SetUp();
+  mc.Run();
+  mc.DisplayResults();
+  mc.Close();
+}
+
+#endif
+
+#if 0
+// These tests were not complete. Needs research/work.
+TEST(rocrtst, Feature_Vector_Copy) {
+  VectorCopy vc;
+  vc.SetUp();
+  vc.Run();
+  vc.Close();
+}
+
+TEST(rocrtst, Perf_Matrix_Transpose) {
+  MatrixTranspose mt;
+  mt.SetUp();
+  mt.Run();
+  mt.DisplayResults();
+  mt.Close();
+}
+
+#endif
+
+//#if NEED_TO_MAKE_BATCH
+TEST(rocrtst, Perf_Memory_Async_Copy) {
+  MemoryAsyncCopy mac;
+  mac.set_num_iteration(10);
+  mac.SetUp();
+  mac.Run();
+  mac.DisplayResults();
+  mac.Close();
+}
+//#endif
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
@@ -0,0 +1,289 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "matrix_transpose.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "common/helper_funcs.h"
+#include "common/hsatimer.h"
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#include "hsa/hsa_ext_finalize.h"
+#include "gtest/gtest.h"
+#include <stdlib.h>
+#include <algorithm>
+
+static const unsigned int NUM_BLOCK_SIZES = 2;
+static const unsigned int blockSizes[NUM_BLOCK_SIZES] = {8, 16};
+static const unsigned int NUM_MATRIX_DIMS = 2;
+static const unsigned int matrixDims[NUM_MATRIX_DIMS] = {1024, 64};
+
+MatrixTranspose::MatrixTranspose(void) :
+  BaseRocR() {
+  in_buffer_sys_ = NULL;
+  out_buffer_sys_ = NULL;
+  in_buffer_ = NULL;
+  out_buffer_ = NULL;
+  width_ = 0;
+  height_ = 0;
+  buf_size_ = 0;
+  block_size_ = 0;
+  time_mean_ = 0.0;
+}
+
+MatrixTranspose::~MatrixTranspose(void) {
+
+}
+
+void MatrixTranspose::SetUp(void) {
+  hsa_status_t err;
+
+  InitializeData();
+
+  set_kernel_file_name("transpose_kernel.o");
+  set_kernel_name("&__OpenCL_matrixTranspose_kernel");
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* gpu_dev = gpu_device1();
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
+                                                                  &cpu_pool());
+  ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
+
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), buf_size_, 0,
+                                     (void**) &in_buffer_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), buf_size_, 0,
+                                     (void**) &out_buffer_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, in_buffer_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, out_buffer_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  // Create a queue
+  hsa_queue_t* q = nullptr;
+  rocrtst::CreateQueue(*gpu_dev, &q);
+  set_main_queue(q);
+
+  rocrtst::LoadKernelFromObjFile(this);
+
+  // Fill up aql packet
+  rocrtst::InitializeAQLPacket(this, &aql());
+  aql().setup = 0;
+  aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+  aql().workgroup_size_x = block_size_;
+  aql().workgroup_size_y = block_size_;
+  aql().grid_size_x = width_;
+  aql().grid_size_y = height_;
+  aql().group_segment_size = sizeof(uint) * block_size_ * block_size_;
+
+  // Debug
+#ifdef DEBUG
+  std::cout << "workgroup size: " << block_size_ << ", " << block_size_
+            << ", " << 1 << std::endl;
+  std::cout << "grid size: " << aql().grid_size_x << ", " <<
+            aql().grid_size_y << ", " << aql().grid_size_z << std::endl;
+  std::cout << "group segment size: " << aql().group_segment_size << std::endl;
+#endif
+}
+
+void MatrixTranspose::Run(void) {
+  hsa_status_t err;
+  hsa_agent_t* gpu_dev = gpu_device1();
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  // Allocate kernel parameter
+  typedef struct args_t {
+    uint* offset_0;
+    uint* offset_1;
+    uint* offset_2;
+    uint* printf_buffer;
+    uint* vqueue_buffer;
+    uint* aqlwrap_pointer;
+
+    uint* in_buf;
+    uint* out_buf;
+    uint* local_buf;
+    uint iblock_size;
+    uint iwidth;
+    uint iheight;
+  } args;
+
+  args* kern_ptr = NULL;
+  err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
+                                     (void**) &kern_ptr);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, kern_ptr);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  kern_ptr->offset_0 = 0;
+  kern_ptr->offset_1 = 0;
+  kern_ptr->offset_2 = 0;
+  kern_ptr->printf_buffer = 0;
+  kern_ptr->vqueue_buffer = 0;
+  kern_ptr->aqlwrap_pointer = 0;
+
+  kern_ptr->in_buf = in_buffer_sys_;
+  kern_ptr->out_buf = out_buffer_sys_;
+  kern_ptr->local_buf = 0;
+  kern_ptr->iblock_size = block_size_;
+  kern_ptr->iwidth = width_;
+  kern_ptr->iheight = height_;
+
+  aql().kernarg_address = kern_ptr;
+
+  //Obtain the current queue write index.
+  uint64_t idx = hsa_queue_add_write_index_relaxed(main_queue(), 1);
+
+  ((hsa_kernel_dispatch_packet_t*)(main_queue()->base_address))[idx] = aql();
+
+  rocrtst::PerfTimer p_timer;
+  int id = p_timer.CreateTimer();
+  p_timer.StartTimer(id);
+
+  ((hsa_kernel_dispatch_packet_t*)(main_queue()->base_address))[idx].header |=
+                     HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+
+  hsa_signal_store_release(main_queue()->doorbell_signal, idx);
+
+  //Wait on the dispatch signal until the kernel is finished.
+  hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
+                                       (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE);
+  p_timer.StopTimer(id);
+
+  hsa_amd_profiling_dispatch_time_t dispatch_time;
+  err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(), &dispatch_time);
+
+  uint64_t stamp = dispatch_time.end - dispatch_time.start;
+  uint64_t freq;
+
+  err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+  std::cout << "Kernel time is: " <<
+            (double) stamp / (double) freq * 1000.0 << std::endl;
+  hsa_signal_store_release(signal(), 1);
+
+
+  // Verify Results
+  VerifyResults (out_buffer_sys_);
+
+  // Abandon the first result which is warm up
+
+  time_mean_ = p_timer.ReadTimer(id); //rocrtst::CalcMean(timer);
+}
+
+void MatrixTranspose::DisplayResults(void) const {
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  std::cout << "============================================" << std::endl;
+  std::cout << "Matrix Transpose Mean Time:       " << time_mean_ << std::endl;
+
+  return;
+}
+
+void MatrixTranspose::Close(void) {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void MatrixTranspose::InitializeData(void) {
+  // int openTest = 1;
+  block_size_ = 16; //blockSizes[openTest % NUM_BLOCK_SIZES];
+  width_ = 1920; //matrixDims[openTest / NUM_BLOCK_SIZES];
+  height_ = width_;
+
+  buf_size_ = width_ * height_ * sizeof(uint);
+
+  in_buffer_sys_ = (uint*) aligned_alloc(256, buf_size_);
+
+  SetData (in_buffer_sys_);
+  out_buffer_sys_ = (uint*) aligned_alloc(256, buf_size_);
+
+  FillData(out_buffer_sys_, 0xdeadbeef);
+
+  return;
+}
+
+void MatrixTranspose::SetData(uint* buffer) {
+  for (unsigned int i = 0; i < height_; i++) {
+    for (unsigned int j = 0; j < width_; j++) {
+      *(buffer + i * width_ + j) = i * width_ + j;
+    }
+  }
+}
+
+void MatrixTranspose::FillData(uint* buffer, unsigned int val) {
+  for (unsigned int i = 0; i < width_ * height_; i++) {
+    buffer[i] = val;
+  }
+}
+
+void MatrixTranspose::VerifyResults(uint* buffer) {
+  bool err = false;
+
+  for (unsigned int i = 0; (i < width_) && !err; i++) {
+    for (unsigned int j = 0; (j < height_) && !err; j++) {
+      ASSERT_EQ(*(buffer + i * height_ + j), j * width_ + i);
+    }
+  }
+
+  std::cout << "PASSED!" << std::endl;
+}
@@ -0,0 +1,101 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#ifndef __ROCRTST_SRC_MATRIX_TRANSPOSE_H__
+#define __ROCRTST_SRC_MATRIX_TRANSPOSE_H__
+
+#include "perf_common/perf_base.h"
+#include "common/base_rocr.h"
+#include "hsa/hsa.h"
+
+class MatrixTranspose: public rocrtst::BaseRocR, public PerfBase {
+
+ public:
+  //@Brief: Default Constructor
+  MatrixTranspose();
+
+  //@Brief: Destructor
+  ~MatrixTranspose();
+
+  //@Brief: Override SetUp function
+  virtual void SetUp();
+
+  //@Brief: Run the measurement
+  virtual void Run();
+
+  //@Brief: Clean up and Close
+  virtual void Close();
+
+  //@Brief: Display  results
+  virtual void DisplayResults() const;
+
+ private:
+  //@Brief: Set up data
+  virtual void SetData(uint* buffer);
+
+  //@Brief: Fill Data
+  virtual void FillData(uint* buffer, unsigned int val);
+
+  //@Brief: VerifyResults
+  virtual void VerifyResults(uint* buffer);
+
+  //@Brief: Initialize the object attribute
+  virtual void InitializeData();
+
+  uint* in_buffer_;
+  uint* out_buffer_;
+  uint* in_buffer_sys_;
+  uint* out_buffer_sys_;
+  unsigned int width_;
+  unsigned int height_;
+  unsigned int buf_size_;
+  unsigned int block_size_;
+  double time_mean_;
+
+  hsa_barrier_and_packet_t bpkt;
+};
+
+#endif //__ROCRTST_SRC_MATRIX_TRANSPOSE_H__
+
@@ -0,0 +1,198 @@
+/*
+ * =============================================================================
+ *   ROC Runtime Conformance Release License
+ * =============================================================================
+ * The University of Illinois/NCSA
+ * Open Source License (NCSA)
+ *
+ * Copyright (c) 2017, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *                 AMD Research and AMD ROC Software Development
+ *
+ *                 Advanced Micro Devices, Inc.
+ *
+ *                 www.amd.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal with the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ *  - Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimers.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimers in
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the names of <Name of Development Group, Name of Institution>,
+ *    nor the names of its contributors may be used to endorse or promote
+ *    products derived from this Software without specific prior written
+ *    permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS WITH THE SOFTWARE.
+ *
+ */
+
+#include "memory_allocation.h"
+#include "common/base_rocr_utils.h"
+#include "common/common.h"
+#include "hsa/hsa.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+
+MemoryAllocation::MemoryAllocation(uint32_t num_iters) :
+  BaseRocR(), allocation_time_ {0.0}, mem_pool_flag_(0) {
+  ptr = NULL;
+}
+
+MemoryAllocation::~MemoryAllocation() {
+
+}
+
+const char* MemoryAllocation::Str[16] = {"64K", "128K", "256K", "512K", "1M",
+                                         "2M", "4M", "8M", "16M", "32M",
+                                         "64M", "128M", "256M", "512M", "1G",
+                                         "2G" 
+                                        };
+const size_t MemoryAllocation::Size[16] = {64*1024, 128*1024,
+                                           256*1024,512*1024, 1024*1024,
+                                           2048*1024, 4096*1024, 8*1024*1024,
+                                           16*1024*1024, 32*1024*1024,
+                                           64*1024*1024, 128*1024*1024,
+                                           256 * 1024*1024, 512*1024*1024,
+                                           1024*1024*1024,
+                                           (size_t)2*1024*1024*1024
+                                          };
+
+void MemoryAllocation::SetUp() {
+  hsa_status_t err;
+
+  if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
+    return;
+  }
+
+  hsa_agent_t* cpu_dev = cpu_device();
+
+  err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
+                                                                  &cpu_pool());
+
+  EXPECT_EQ(err, HSA_STATUS_INFO_BREAK);
+
+  if (err != HSA_STATUS_INFO_BREAK) {
+    std::cout << "Unable to find global pool. Test will not be run."
+              << std::endl;
+    return;
+  }
+
+  //At this point, cpu_pool() should be in the global segment
+  err = hsa_amd_memory_pool_get_info(cpu_pool(),
+         (hsa_amd_memory_pool_info_t) HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
+                                                             &mem_pool_flag_);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+}
+
+void MemoryAllocation::Run() {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  if (cpu_pool().handle == 0) {
+    return;
+  }
+
+  size_t iterations = RealIterationNum();
+  hsa_status_t err;
+
+  //Iterate over the different data size
+  for (int i = 0; i < 16; i++) {
+    std::vector<double> time;
+
+    for (uint32_t it = 0; it < iterations; it++) {
+#if DEBUG
+      std::cout << "." << std::flush;
+#endif
+
+      rocrtst::PerfTimer allocation_timer;
+      int index = allocation_timer.CreateTimer();
+
+      allocation_timer.StartTimer(index);
+      err = hsa_amd_memory_pool_allocate(cpu_pool(), Size[i], 0, &ptr);
+      allocation_timer.StopTimer(index);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+
+      //Free the memory which was allocated
+      err = hsa_amd_memory_pool_free(ptr);
+      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+      ptr = NULL;
+
+      // PUsh the results back to vector time
+      time.push_back(allocation_timer.ReadTimer(index));
+    }
+
+#if DEBUG
+    std::cout << std::endl;
+#endif
+
+    //Get mean copy time and store to the array
+    allocation_time_[i] = GetMeanTime(time);
+  }
+}
+
+size_t MemoryAllocation::RealIterationNum() {
+  return num_iteration() * 1.2 + 1;
+}
+
+double MemoryAllocation::GetMeanTime(std::vector<double>& vec) {
+  std::sort(vec.begin(), vec.end());
+
+  vec.erase(vec.begin());
+  vec.erase(vec.begin(), vec.begin() + num_iteration() * 0.1);
+  vec.erase(vec.begin() + num_iteration(), vec.end());
+
+  double mean = 0.0;
+  int num = vec.size();
+
+  for (int it = 0; it < num; it++) {
+    mean += vec[it];
+  }
+
+  mean /= num;
+  return mean;
+}
+
+void MemoryAllocation::DisplayResults() const {
+
+  if (!rocrtst::CheckProfile(this)) {
+    return;
+  }
+
+  fprintf(stdout, "==============================================\n");
+  fprintf(stdout, "  Data Size  Allocation_time   BandWidth(GB/s)\n");
+
+  for (int i = 0; i < 16; i++) {
+    fprintf(stdout, "  %9s  %15.6f   %15.6f\n", Str[i], allocation_time_[i],
+            2 * Size[i] / allocation_time_[i] / 1024 / 1024 / 1024);
+  }
+
+  fprintf(stdout, "==============================================\n");
+
+  return;
+}
+
+void MemoryAllocation::Close() {
+  hsa_status_t err;
+  err = rocrtst::CommonCleanUp(this);
+  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
+  return;
+}
--- a/Show More
+++ b/Show More