Refactored performance test code

Commented and flattened binary search sample.

Change-Id: Ib783292207c956d16003195924a3bcfbbde5039f


[ROCm/ROCR-Runtime commit: 8161ebb915]
This commit is contained in:
Chris Freehill
2017-05-05 23:50:42 -05:00
parent 768644ba7a
commit 9f1065771a
117 changed files with 46627 additions and 0 deletions
+12
View File
@@ -0,0 +1,12 @@
*.o
*.bin
*.tar
*.hsaco
*.orig
*.obsol
*.bk
*.old
*.cmake
build/*
+74
View File
@@ -0,0 +1,74 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/base_rocr.h"
#include "common/base_rocr_utils.h"
#include "common/os.h"
namespace rocrtst {
BaseRocR::BaseRocR(void) {
num_iteration_ = 100;
signal_.handle = 0;
cpu_device_.handle = -1;
gpu_device1_.handle = -1;
region_.handle = 0;
device_pool_.handle = 0;
kern_arg_pool_.handle = 0;
main_queue_ = nullptr;
kernarg_buffer_ = nullptr;
kernel_object_ = 0;
memset(&aql_, 0, sizeof(aql_));
set_requires_profile(-1);
set_enable_interrupt(false);
orig_hsa_enable_interrupt_ = GetEnv("HSA_ENABLE_INTERRUPT");
set_kernel_file_name("");
set_verbosity(0);
}
BaseRocR::~BaseRocR() {
}
} // namespace rocrtst
@@ -0,0 +1,294 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file
/// File containg base class declaration needed for all RocR tests and samples
/// that allow derived classes to use utility functions.
#ifndef ROCRTST_COMMON_BASE_ROCR_H_
#define ROCRTST_COMMON_BASE_ROCR_H_
#include <stdint.h>
#include <stdio.h>
#include <string>
#include "common/common.h"
#include "common/hsatimer.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
namespace rocrtst {
/// Common interface for RocR tests and samples, required for several
/// common functions
class BaseRocR {
public:
BaseRocR(void);
virtual ~BaseRocR(void);
///< Setters and Getters
void set_gpu_device1(hsa_agent_t in_dev) {
gpu_device1_.handle = in_dev.handle;
}
hsa_agent_t* gpu_device1(void) {
return &gpu_device1_;
}
void set_cpu_device(hsa_agent_t in_dev) {
cpu_device_.handle = in_dev.handle;
}
hsa_agent_t* cpu_device(void) {
return &cpu_device_;
}
void set_kernel_file_name(const char* in_file_name) {
kernel_file_name_ = in_file_name;
}
std::string const kernel_file_name(void) const {
return kernel_file_name_;
}
const
void set_kernel_name(std::string in_kernel_name) {
kernel_name_ = in_kernel_name;
}
std::string const kernel_name(void) const {
return kernel_name_;
}
void set_kernel_object(uint64_t in_kernel_object) {
kernel_object_ = in_kernel_object;
}
uint64_t kernel_object(void) const {
return kernel_object_;
}
void set_signal(hsa_signal_t sig) {
signal_.handle = sig.handle;
}
const hsa_signal_t& signal(void) const {
return signal_;
}
void set_profile(hsa_profile_t in_prof) {
profile_ = in_prof;
}
hsa_profile_t profile(void) const {
return profile_;
}
uint32_t private_segment_size(void) const {
return private_segment_size_;
}
void set_private_segment_size(uint32_t sz) {
private_segment_size_ = sz;
}
void set_group_segment_size(uint32_t sz) {
group_segment_size_ = sz;
}
uint32_t group_segment_size(void) const {
return group_segment_size_;
}
void set_group_size(uint32_t sz) {
group_size_ = sz;
}
uint32_t group_size(void) const {
return group_size_;
}
void set_main_queue(hsa_queue_t* q) {
main_queue_ = q;
}
hsa_queue_t* main_queue(void) const {
return main_queue_;
}
hsa_kernel_dispatch_packet_t& aql(void) {
return aql_;
}
hsa_region_t& region(void) {
return region_;
}
void set_num_iteration(int num) {
num_iteration_ = num;
}
uint32_t num_iteration(void) const {
return num_iteration_;
}
hsa_amd_memory_pool_t& device_pool(void) {
return device_pool_;
}
hsa_amd_memory_pool_t& cpu_pool(void) {
return cpu_pool_;
}
hsa_amd_memory_pool_t& kern_arg_pool(void) {
return kern_arg_pool_;
}
void set_kernarg_size(uint32_t sz) {
kernarg_size_ = sz;
}
uint32_t kernarg_size(void) const {
return kernarg_size_;
}
void set_kernarg_align(uint32_t align) {
kernarg_align_ = align;
}
uint32_t kernarg_align(void) const {
return kernarg_align_;
}
void* kernarg_buffer(void) const {
return kernarg_buffer_;
}
void set_kernarg_buffer(void* buffer) {
kernarg_buffer_ = buffer;
}
int32_t requires_profile(void) const {
return requires_profile_;
}
char* orig_hsa_enable_interrupt() const {
return orig_hsa_enable_interrupt_;
}
bool enable_interrupt() const {
return enable_interrupt_;
}
void set_title(std::string name) {
title_ = name;
}
std::string title(void) const {
return title_;
}
PerfTimer* hsa_timer(void) {
return &hsa_timer_;
}
void set_verbosity(uint32_t v) {
verbosity_ = v;
}
uint32_t verbosity(void) const {
return verbosity_;
}
protected:
void set_requires_profile(int32_t reqd_prof) {
requires_profile_ = reqd_prof;
}
void set_enable_interrupt(bool doEnable) {
enable_interrupt_ = doEnable;
}
private:
uint64_t num_iteration_; ///< Number of times to execute test
hsa_signal_t signal_; ///< Completion signal used for kernel execution
hsa_queue_t* main_queue_; ///< AQL queue used for packets
hsa_agent_t gpu_device1_; ///< Handle to first GPU found
hsa_agent_t cpu_device_; ///< Handle to CPU
hsa_region_t region_; ///< TODO(cfreehil): delete this
hsa_amd_memory_pool_t device_pool_; ///< Memory pool on gpu pool list
hsa_amd_memory_pool_t cpu_pool_; ///< Memory pool on cpu pool list
hsa_amd_memory_pool_t kern_arg_pool_; ///< Memory pool suitable for args
uint64_t kernel_object_; ///< Handle to kernel code
std::string brig_file_; // TODO(cfreehil): delete this
std::string kernel_file_name_; ///< Code object file name
std::string kernel_name_; ///< Kernel name
hsa_kernel_dispatch_packet_t aql_; ///< Kernel dispatch packet
uint32_t group_segment_size_; ///< Kernel group seg size
uint32_t kernarg_size_; ///< Kernarg memory size
uint32_t kernarg_align_; ///< Alignment for kern argument memory
void* kernarg_buffer_; ///< Unaligned allocated kernel arg. buffer
hsa_profile_t profile_; ///< Device profile.
uint32_t group_size_; ///< Number of work items in one group
uint32_t private_segment_size_; ///< Kernel private seg size
int32_t requires_profile_; ///< Profile required by test (-1 if no req.)
char* orig_hsa_enable_interrupt_; ///< Orig. value of HSA_ENABLE_INTERRUPT
bool enable_interrupt_; ///< Whether to enable/disable interrupts for test
std::string title_; ///< Displayed title of test
uint32_t verbosity_; ///< How much additional output to produce
PerfTimer hsa_timer_; ///< Timer to be used for timing parts of test
};
} // namespace rocrtst
#endif // ROCRTST_COMMON_BASE_ROCR_H_
+476
View File
@@ -0,0 +1,476 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file
/// Utility functions that act on BaseRocR objects.
#include "common/base_rocr_utils.h"
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string>
#include "common/base_rocr.h"
#include "common/helper_funcs.h"
#include "common/os.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_profiler.h"
namespace rocrtst {
#define RET_IF_HSA_UTILS_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << std::endl; \
return (err); \
} \
}
hsa_status_t CommonCleanUp(BaseRocR* test) {
hsa_status_t err;
assert(test != nullptr);
if (nullptr != test->kernarg_buffer()) {
err = hsa_amd_memory_pool_free(test->kernarg_buffer());
RET_IF_HSA_UTILS_ERR(err);
test->set_kernarg_buffer(nullptr);
}
if (nullptr != test->main_queue()) {
err = hsa_queue_destroy(test->main_queue());
RET_IF_HSA_UTILS_ERR(err);
test->set_main_queue(nullptr);
}
if (0 != test->signal().handle) {
hsa_signal_t sig;
sig.handle = 0;
err = hsa_signal_destroy(test->signal());
RET_IF_HSA_UTILS_ERR(err);
test->set_signal(sig);
}
err = hsa_shut_down();
RET_IF_HSA_UTILS_ERR(err);
std::string intr_val;
if (test->orig_hsa_enable_interrupt() == nullptr) {
intr_val = "";
} else {
intr_val = test->orig_hsa_enable_interrupt();
}
SetEnv("HSA_ENABLE_INTERRUPT", intr_val.c_str());
return err;
}
static const char* PROFILE_STR[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL", };
/// Verify that the machine running the test has the required profile.
/// This function will verify that the execution machine meets any specific
/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
/// \param[in] test Test that provides profile requirements.
/// \returns bool
/// - true Machine meets test requirements
/// - false Machine does not meet test requirements
static bool CheckProfileAndInform(BaseRocR* test) {
if (test->verbosity() > 0) {
std::cout << "Target HW Profile is "
<< PROFILE_STR[test->profile()] << std::endl;
}
if (test->requires_profile() == -1) {
if (test->verbosity() > 0) {
std::cout << "Test can run on any profile. OK." << std::endl;
}
return true;
} else {
std::cout << "Test requires " << PROFILE_STR[test->requires_profile()]
<< ". ";
if (test->requires_profile() != test->profile()) {
std::cout << "Not Running." << std::endl;
return false;
} else {
std::cout << "OK." << std::endl;
return true;
}
}
}
/// Helper function to process error returned from
/// iterate function like hsa_amd_agent_iterate_memory_pools
/// \param[in] Error returned from iterate call
/// \returns HSA_STATUS_SUCCESS iff iterate call succeeds in finding
/// what was being searched for
static hsa_status_t ProcessIterateError(hsa_status_t err) {
if (err == HSA_STATUS_INFO_BREAK) {
err = HSA_STATUS_SUCCESS;
} else if (err == HSA_STATUS_SUCCESS) {
// This actually means no pool was found.
err = HSA_STATUS_ERROR;
}
return err;
}
hsa_status_t SetPoolsTypical(BaseRocR* test) {
hsa_status_t err;
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
rocrtst::FindStandardPool, &test->cpu_pool());
RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
err = hsa_amd_agent_iterate_memory_pools(*test->gpu_device1(),
rocrtst::FindStandardPool, &test->device_pool());
RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
rocrtst::FindKernArgPool, &test->kern_arg_pool());
RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
return HSA_STATUS_SUCCESS;
}
hsa_status_t InitAndSetupHSA(BaseRocR* test) {
hsa_agent_t gpu_device1;
hsa_agent_t cpu_device;
hsa_status_t err;
hsa_signal_t sig;
if (test->enable_interrupt()) {
SetEnv("HSA_ENABLE_INTERRUPT", "1");
}
err = hsa_init();
RET_IF_HSA_UTILS_ERR(err);
gpu_device1.handle = 0;
err = hsa_iterate_agents(FindGPUDevice, &gpu_device1);
RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
test->set_gpu_device1(gpu_device1);
cpu_device.handle = 0;
err = hsa_iterate_agents(FindCPUDevice, &cpu_device);
RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
test->set_cpu_device(cpu_device);
if (0 == gpu_device1.handle) {
std::cout << "GPU Device is not Created properly!" << std::endl;
RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
}
if (0 == cpu_device.handle) {
std::cout << "CPU Device is not Created properly!" << std::endl;
RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
}
if (test->verbosity() > 0) {
char name[64] = {0};
err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_NAME, name);
RET_IF_HSA_UTILS_ERR(err);
std::cout << "The device name is " << name << std::endl;
}
hsa_profile_t profile;
err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_PROFILE, &profile);
RET_IF_HSA_UTILS_ERR(err);
test->set_profile(profile);
if (!CheckProfileAndInform(test)) {
return HSA_STATUS_ERROR;
}
err = hsa_signal_create(1, 0, NULL, &sig);
RET_IF_HSA_UTILS_ERR(err);
test->set_signal(sig);
return HSA_STATUS_SUCCESS;
}
bool CheckProfile(BaseRocR const* test) {
if (test->requires_profile() == -1) {
return true;
} else {
return (test->requires_profile() == test->profile());
}
}
hsa_status_t LoadKernelFromObjFile(BaseRocR* test) {
hsa_status_t err;
hsa_code_object_reader_t code_obj_rdr = {0};
hsa_executable_t executable = {0};
assert(test != nullptr);
hsa_agent_t* agent = test->gpu_device1(); // Assume GPU agent for now
std::string obj_file = "./" + test->kernel_file_name();
std::string kern_name = test->kernel_name();
hsa_file_t file_handle = open(obj_file.c_str(), O_RDONLY);
if (file_handle == -1) {
std::cout << "failed to open " << obj_file.c_str() << " at line "
<< __LINE__ << ", file: " << __FILE__ << std::endl;
return (hsa_status_t) errno;
}
err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
RET_IF_HSA_UTILS_ERR(err);
close(file_handle);
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
NULL, &executable);
RET_IF_HSA_UTILS_ERR(err);
err = hsa_executable_load_agent_code_object(executable, *agent, code_obj_rdr,
NULL, NULL);
RET_IF_HSA_UTILS_ERR(err);
err = hsa_executable_freeze(executable, NULL);
RET_IF_HSA_UTILS_ERR(err);
hsa_executable_symbol_t kern_sym;
err = hsa_executable_get_symbol(executable, NULL, kern_name.c_str(), *agent,
0, &kern_sym);
RET_IF_HSA_UTILS_ERR(err);
uint64_t codeHandle;
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle);
RET_IF_HSA_UTILS_ERR(err);
test->set_kernel_object(codeHandle);
uint32_t val;
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &val);
RET_IF_HSA_UTILS_ERR(err);
test->set_private_segment_size(val);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &val);
RET_IF_HSA_UTILS_ERR(err);
test->set_group_segment_size(val);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &val);
RET_IF_HSA_UTILS_ERR(err);
test->set_kernarg_size(val);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &val);
RET_IF_HSA_UTILS_ERR(err);
test->set_kernarg_align(val);
return HSA_STATUS_SUCCESS;
}
hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
uint32_t num_pkts, bool do_profile) {
hsa_status_t err;
if (num_pkts == 0) {
err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
&num_pkts);
RET_IF_HSA_UTILS_ERR(err);
}
if (do_profile) {
err = hsa_ext_tools_queue_create_profiled(device,
num_pkts, HSA_QUEUE_TYPE_SINGLE, NULL,
NULL, UINT32_MAX, UINT32_MAX, queue);
RET_IF_HSA_UTILS_ERR(err);
} else {
err = hsa_queue_create(device, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL,
NULL, UINT32_MAX, UINT32_MAX, queue);
RET_IF_HSA_UTILS_ERR(err);
}
return HSA_STATUS_SUCCESS;
}
void InitializeAQLPacket(const BaseRocR* test,
hsa_kernel_dispatch_packet_t* aql) {
assert(aql != nullptr);
if (aql == nullptr) {
return;
}
aql->header = 0; // Set this right before doorbell ring
aql->setup = 1;
aql->workgroup_size_x = 256;
aql->workgroup_size_y = 1;
aql->workgroup_size_z = 1;
aql->grid_size_x = (uint64_t) 256; // manual_input*group_input; workg max sz
aql->grid_size_y = 1;
aql->grid_size_z = 1;
aql->private_segment_size = test->private_segment_size();
aql->group_segment_size = test->group_segment_size();
// Pin kernel code and the kernel argument buffer to the aql packet->
aql->kernel_object = test->kernel_object();
aql->kernarg_address = NULL;
aql->completion_signal.handle = test->signal().handle;
return;
}
void WriteAQLToQueue(BaseRocR* test) {
assert(test);
assert(test->main_queue());
void *queue_base = test->main_queue()->base_address;
const uint32_t queue_mask = test->main_queue()->size - 1;
uint64_t que_idx = hsa_queue_add_write_index_relaxed(test->main_queue(), 1);
hsa_kernel_dispatch_packet_t* staging_aql_packet = &test->aql();
hsa_kernel_dispatch_packet_t* queue_aql_packet;
queue_aql_packet =
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
[que_idx & queue_mask];
queue_aql_packet->workgroup_size_x = staging_aql_packet->workgroup_size_x;
queue_aql_packet->workgroup_size_y = staging_aql_packet->workgroup_size_y;
queue_aql_packet->workgroup_size_z = staging_aql_packet->workgroup_size_z;
queue_aql_packet->grid_size_x = staging_aql_packet->grid_size_x;
queue_aql_packet->grid_size_y = staging_aql_packet->grid_size_y;
queue_aql_packet->grid_size_z = staging_aql_packet->grid_size_z;
queue_aql_packet->private_segment_size =
staging_aql_packet->private_segment_size;
queue_aql_packet->group_segment_size =
staging_aql_packet->group_segment_size;
queue_aql_packet->kernel_object = staging_aql_packet->kernel_object;
queue_aql_packet->kernarg_address = staging_aql_packet->kernarg_address;
queue_aql_packet->completion_signal = staging_aql_packet->completion_signal;
}
hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args, size_t arg_size) {
void* kern_arg_buf = nullptr;
hsa_status_t err;
size_t buf_size;
size_t req_align;
assert(args != nullptr);
assert(test != nullptr);
req_align = test->kernarg_align();
// Allocate enough extra space for alignment adjustments if ncessary
buf_size = arg_size + (req_align << 1);
err = hsa_amd_memory_pool_allocate(test->kern_arg_pool(), buf_size, 0,
reinterpret_cast<void**>(&kern_arg_buf));
RET_IF_HSA_UTILS_ERR(err);
test->set_kernarg_buffer(kern_arg_buf);
void *adj_kern_arg_buf = rocrtst::AlignUp(kern_arg_buf, req_align);
assert(arg_size >= test->kernarg_size());
assert(((uintptr_t)adj_kern_arg_buf + arg_size) <
((uintptr_t)kern_arg_buf + buf_size));
err = hsa_memory_copy_workaround_cpu(adj_kern_arg_buf, args, arg_size);
RET_IF_HSA_UTILS_ERR(err);
hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
err = hsa_amd_agents_allow_access(2, ag_list, NULL, kern_arg_buf);
RET_IF_HSA_UTILS_ERR(err);
test->aql().kernarg_address = adj_kern_arg_buf;
return HSA_STATUS_SUCCESS;
}
hsa_status_t AllocAndAllowAccess(BaseRocR* test, size_t len,
hsa_amd_memory_pool_t pool, void**buffer) {
hsa_status_t err;
err = hsa_amd_memory_pool_allocate(pool, len, 0, buffer);
RET_IF_HSA_UTILS_ERR(err);
hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
err = hsa_amd_agents_allow_access(2, ag_list, NULL, *buffer);
RET_IF_HSA_UTILS_ERR(err);
return err;
}
hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, BaseRocR* test) {
hsa_status_t err;
void *tmp_mem;
err = hsa_amd_memory_pool_allocate(test->cpu_pool(), count, 0, &tmp_mem);
RET_IF_HSA_UTILS_ERR(err);
hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
err = hsa_amd_agents_allow_access(2, ag_list, NULL, tmp_mem);
RET_IF_HSA_UTILS_ERR(err);
(void)memset(tmp_mem, value, count);
err = hsa_memory_copy_workaround_gen(ptr, tmp_mem, count, dst_ag, src_ag);
RET_IF_HSA_UTILS_ERR(err);
hsa_amd_memory_pool_free(tmp_mem);
return HSA_STATUS_SUCCESS;
}
#undef RET_IF_HSA_UTILS_ERR
} // namespace rocrtst
+172
View File
@@ -0,0 +1,172 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_COMMON_BASE_ROCR_UTILS_H_
#define ROCRTST_COMMON_BASE_ROCR_UTILS_H_ 1
/// \file
/// Prototypes of utility functions that act on BaseRocR objects.
#include "common/base_rocr.h"
#include "hsa/hsa.h"
namespace rocrtst {
/// Open binary kernel object file and set all member data related to the
/// kernel. Assumes that input test already has the kernel file name and
/// kernel function specifed
/// \param[in] test Test for which the kernel will be loaded.
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t LoadKernelFromObjFile(BaseRocR* test);
/// Do initialization tasks for HSA test program. This includes calling
/// hsa_init(), finding and setting the cpu and gpu agent member variables,
/// creating the signal needed for queueing AQL packets and checking
/// HW requirements.
/// \param[in] test Test to initialize
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t InitAndSetupHSA(BaseRocR* test);
/// For the provided device agent, create an AQL queue
/// \param[in] device Device for which a queue is to be created
/// \param[out] queue Address to which created queue pointer will be written
/// \param[in] num_pkts Size of the queue to create
/// \param[in] do_profile [Optional] Specificy whether profiled queue should
/// be created
/// \returns HSA_STATUS_SUCCESS if no errors encountered
hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
uint32_t num_pkts = 0, bool do_profile = false);
/// This function sets some reasonable default values for an AQL packet.
/// Override any field as necessary after calling this function.
/// \param[in] test Test from which information to populate aql packet can
/// be drawn.
/// \param[inout] aql Caller provided pointer to aql packet that will be
/// populated
/// \returns void
void InitializeAQLPacket(const BaseRocR* test,
hsa_kernel_dispatch_packet_t* aql);
/// This function writes all of the aql packet fields to the queue besides
/// "setup" and "header". This assumes all the aql fields have be set
/// appropriately.
/// \param[in] test Test containing the queue and aql packet to be written.
/// \returns void
void WriteAQLToQueue(BaseRocR* test);
/// This function writes the first 32 bits of an aql packet to the provided
/// aql packet. This function is meant to be called immediately before
/// ringing door_bell signal.
/// \param[in] header Value to be written to header field
/// \param[in] setup Value to be written to setup field
/// \param[in] queue_packet Start address of in queue memory of aql packet to
/// be written
/// \returns void
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup <<16), __ATOMIC_RELEASE);
}
/// Perform common operations to clean up after executing a test. Specifically,
/// hsa_shut_down() is called and environment variables that were changed are
/// reset to their original values.
/// \param[in] test Test for which clean up with be performed
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
/// error code otherwise.
hsa_status_t CommonCleanUp(BaseRocR* test);
/// Check to see if target machine has the necessary profile to run the
/// provided test.
/// \param[1] test The test that specifies the required profile.
bool CheckProfile(BaseRocR const* test);
/// Allocate memory from the kernel args pool and write the provided argument
/// data to the kernel arg memory. Assumes kern_arg memory pool has been
/// assigned. The amount of memory allocated will actually be \p arg_size
/// plus the alignment required by the kernel arguments. The argument will
/// be written with the proper alignment within the allocated buffer.
/// \p test kernarg_buffer() will point to the allocated buffer, and it should
/// be freed when the kernel is no longer being used.
/// \param test Test from which to find kern_arg pool to write arguments
/// \param args pointer to block of data containing kernel arguments to be
/// written. Arguments are assumed to be of the correct placement, length,
/// and with any padding that is expected by the OpenCL kernel
/// \param arg_size Size of the kernel arg data (including padding) to be
/// written
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args,
size_t arg_size);
/// This function will set the cpu and gpu memory pools to the type used in
/// many applications.
/// \param[in] test Test that provides profile requirements.
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
/// error code otherwise.
hsa_status_t SetPoolsTypical(BaseRocR* test);
/// Allocate memory from a specified pool and grant both standard BaseRocR
/// agents access
/// \param[in] test Test having the agents to which access is granted
/// \param[in] len Size of the memory buffer to allocate
/// \pool[in] Pool from which to allocate memory
/// \buffer[out] Address of pointer which will point to newly allocated memory
/// upon return
/// \returns HSA_STATUS_OK if no errors
hsa_status_t AllocAndAllowAccess(BaseRocR* test, size_t len,
hsa_amd_memory_pool_t pool, void**buffer);
/// Work-around for hsa_amd_memory_fill, which is currently broken.
/// \param[in] ptr Pointer to start of memory location to be filled
/// \param[in] value Value to write to each byte of input buffer
/// \param[in] count Size of buffer to fill
/// \param[in] dst_ag Agent owning the buffer to be filled
/// \param[in] src_ag Agent wanting to do the fill
/// \param[in] test Test that has handles to cpu and gpu agents that can own
/// either source or destination of fill
/// \returns HSA_STATUS_OK if not errors
hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, BaseRocR* test);
} // namespace rocrtst
#endif // ROCRTST_COMMON_BASE_ROCR_UTILS_H_
+403
View File
@@ -0,0 +1,403 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file
/// Implementation of utility functions used by RocR applications
#include "common/common.h"
#include <assert.h>
#include <sstream>
#include <string>
namespace rocrtst {
#define RET_IF_HSA_COMMON_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << ". Call returned " << err << std::endl; \
return (err); \
} \
}
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
hsa_device_type_t dev_type) {
assert(data != nullptr);
if (data == nullptr) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_device_type_t hsa_device_type;
hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
&hsa_device_type);
RET_IF_HSA_COMMON_ERR(hsa_error_code);
if (hsa_device_type == dev_type) {
*(reinterpret_cast<hsa_agent_t*>(data)) = agent;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
}
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
}
/// Ennumeration that indicates whether a pool property must be present or not.
/// This is meant to be used by FindPool
typedef enum {
POOL_PROP_OFF = 0, ///< The property must be present.
POOL_PROP_ON, ///< The property must not be present.
POOL_PROP_DONT_CARE ///< We don't care if the property is present or not.
} pool_prop_t;
static hsa_status_t
FindPool(hsa_amd_memory_pool_t pool, void* data, hsa_amd_segment_t in_segment,
pool_prop_t accessible_by_all, pool_prop_t kern_arg,
pool_prop_t fine_grain) {
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
RET_IF_HSA_COMMON_ERR(err);
if (in_segment != segment) {
return HSA_STATUS_SUCCESS;
}
if (HSA_AMD_SEGMENT_GLOBAL == in_segment) {
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
RET_IF_HSA_COMMON_ERR(err);
if (kern_arg != POOL_PROP_DONT_CARE) {
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if ((karg_st == 0 && kern_arg == POOL_PROP_ON) ||
(karg_st != 0 && kern_arg == POOL_PROP_OFF)) {
return HSA_STATUS_SUCCESS;
}
}
if (fine_grain != POOL_PROP_DONT_CARE) {
uint32_t fg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED;
if ((fg_st == 0 && fine_grain == POOL_PROP_ON) ||
(fg_st != 0 && fine_grain == POOL_PROP_OFF)) {
return HSA_STATUS_SUCCESS;
}
}
}
if (accessible_by_all != POOL_PROP_DONT_CARE) {
bool access_read;
err = hsa_amd_memory_pool_get_info(pool,
(hsa_amd_memory_pool_info_t)
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_read);
RET_IF_HSA_COMMON_ERR(err);
if (((!access_read) && accessible_by_all == POOL_PROP_ON) ||
(access_read && (accessible_by_all == POOL_PROP_OFF))) {
return HSA_STATUS_SUCCESS;
}
}
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
return HSA_STATUS_INFO_BREAK;
}
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
}
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
POOL_PROP_ON, POOL_PROP_DONT_CARE);
}
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_ON,
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
}
static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
std::string* out_str) {
hsa_status_t err;
uint32_t global_flag = 0;
assert(out_str != nullptr);
*out_str = "";
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
RET_IF_HSA_COMMON_ERR(err);
std::vector < std::string > flags;
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
flags.push_back("KERNARG");
}
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
flags.push_back("FINE GRAINED");
}
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) {
flags.push_back("COARSE GRAINED");
}
if (flags.size() > 0) {
*out_str += flags[0];
}
for (size_t i = 1; i < flags.size(); i++) {
*out_str += ", " + flags[i];
}
return HSA_STATUS_SUCCESS;
}
static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
std::string const *ind_lvl) {
uint32_t segment;
hsa_status_t err;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
std::string seg_str = "";
std::string tmp_str;
switch (segment) {
case HSA_AMD_SEGMENT_GLOBAL:
err = MakeGlobalFlagsString(pool, &tmp_str);
RET_IF_HSA_COMMON_ERR(err);
seg_str += "GLOBAL; FLAGS: " + tmp_str;
break;
case HSA_AMD_SEGMENT_READONLY:
seg_str += "READONLY";
break;
case HSA_AMD_SEGMENT_PRIVATE:
seg_str += "PRIVATE";
break;
case HSA_AMD_SEGMENT_GROUP:
seg_str += "GROUP";
break;
default:
std::cout << "Not Supported" << std::endl;
break;
}
fprintf(stdout, "%-35s\n", seg_str.c_str());
return HSA_STATUS_SUCCESS;
}
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
uint32_t indent) {
hsa_status_t err;
std::string ind_lvl(indent, ' ');
DumpSegment(pool, &ind_lvl);
// Get the size of the POOL
size_t pool_size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
&pool_size);
RET_IF_HSA_COMMON_ERR(err);
std::string sz_str = std::to_string(pool_size / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
sz_str.c_str());
bool alloc_allowed = false;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
(alloc_allowed ? "TRUE" : "FALSE"));
size_t alloc_granule = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &alloc_granule);
RET_IF_HSA_COMMON_ERR(err);
std::string gr_str = std::to_string(alloc_granule / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
gr_str.c_str());
size_t pool_alloc_alignment = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
&pool_alloc_alignment);
RET_IF_HSA_COMMON_ERR(err);
std::string al_str = std::to_string(pool_alloc_alignment / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
al_str.c_str());
bool pl_access = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pl_access);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
(pl_access ? "TRUE" : "FALSE"));
return HSA_STATUS_SUCCESS;
}
static const char* Types[] = {"HSA_EXT_POINTER_TYPE_UNKNOWN",
"HSA_EXT_POINTER_TYPE_HSA",
"HSA_EXT_POINTER_TYPE_LOCKED",
"HSA_EXT_POINTER_TYPE_GRAPHICS",
"HSA_EXT_POINTER_TYPE_IPC"
};
hsa_status_t DumpPointerInfo(void* ptr) {
hsa_amd_pointer_info_t info;
hsa_agent_t* agents;
uint32_t count;
hsa_status_t err;
err = hsa_amd_pointer_info(ptr, &info, malloc, &count, &agents);
RET_IF_HSA_COMMON_ERR(err);
std::cout << "Info for ptr: " << ptr << std::endl;
std::cout << "CPU ptr: " << reinterpret_cast<void*>(info.hostBaseAddress) <<
std::endl;
std::cout << "GPU ptr: " << reinterpret_cast<void*>(info.agentBaseAddress)
<< std::endl;
std::cout << "Size: " << info.sizeInBytes << std::endl;
std::cout << "Type: " << Types[info.type] << std::endl;
std::cout << "UsrPtr " << reinterpret_cast<void*>(info.userData) <<
std::endl;
std::cout << "Accessible by: ";
for (uint32_t i = 0; i < count; i++) {
std::cout << agents[i].handle << " ";
}
std::cout << " ;[EOM]" << std::endl;
free(agents);
return HSA_STATUS_SUCCESS;
}
hsa_status_t hsa_memory_fill_workaround_cpu(void* ptr, uint32_t value,
size_t count) {
(void)memset(ptr, value, count);
return HSA_STATUS_SUCCESS;
}
hsa_status_t hsa_memory_copy_workaround_cpu(void* dst, const void *src,
size_t size) {
(void)memcpy(dst, src, size);
return HSA_STATUS_SUCCESS;
}
hsa_status_t hsa_memory_copy_workaround_gen(void* dst, const void *src,
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
hsa_signal_t s;
hsa_status_t err;
err = hsa_signal_create(1, 0, NULL, &s);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
RET_IF_HSA_COMMON_ERR(err);
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
err = HSA_STATUS_ERROR;
std::cout << "Async copy signal error" << std::endl;
RET_IF_HSA_COMMON_ERR(err);
}
err = hsa_signal_destroy(s);
RET_IF_HSA_COMMON_ERR(err);
return err;
}
/*! \brief Writes to the buffer and increments the write pointer to the
* buffer. Also, ensures that the argument is written to an
* aligned memory as specified. Return the new write pointer.
*
* @param dst The write pointer to the buffer
* @param src The source pointer
* @param size The size in bytes to copy
* @param alignment The alignment to follow while writing to the buffer
*/
#if 0
inline void *
addArg(void * dst, const void* src, size_t size, uint32_t alignment) {
dst = rocrtst::AlignUp(dst, alignment);
::memcpy(dst, src, size);
return dst + size;
}
#endif
#undef RET_IF_HSA_COMMON_ERR
} // namespace rocrtst
+174
View File
@@ -0,0 +1,174 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file
/// RocR related helper functions for sequeneces that come up frequently
#ifndef ROCRTST_COMMON_COMMON_H_
#define ROCRTST_COMMON_COMMON_H_
#include <stdio.h>
#include <string.h>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <vector>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_finalize.h"
namespace rocrtst {
#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif // __GNUC__
#endif // _MSC_VER
#define MULTILINE(...) # __VA_ARGS__
// define below should be deleted. Leaving in commented out until code that
// refers to it has been corrected
// #define HSA_ARGUMENT_ALIGN_BYTES 16
/// If the provided agent is associated with a GPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find GPU agents.
/// \param[in] agent Agent to evaluate if GPU
/// \param[out] data If agent is associated with a GPU, this pointer will point
/// to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data);
/// If the provided agent is associated with a CPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find CPU agents.
/// \param[in] agent Agent to evaluate if CPU
/// \param[out] data If agent is associated with a CPU, this pointer will point
/// to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data);
// TODO(cfreehil): get rid of FindGlobalPool and replace with FindStandardPool
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data);
/// Find a "standard" pool. By this, we mean not a kernel args pool.
/// The pool found will have the following properties:
/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: Off
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
/// to the pool upon return
/// \returns hsa_status_t
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
/// -else return an appropriate error code for any error encountered
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data);
/// Find a "kernel arg" pool.
/// The pool found will have the following properties:
/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: On
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
/// to the pool upon return
/// \returns hsa_status_t
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
/// -else return an appropriate error code for any error encountered
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);
/// Dump information about provided memory pool to STDOUT
/// \param[in] pool Pool to gather and dump information for
/// \param[in] indent Number of spaces to indent output.
/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
uint32_t indent = 0);
/// Dump information about a provided pointer to STDOUT.
/// \param[in] ptr Pointer about which information is dumped.
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t DumpPointerInfo(void* ptr);
/// This is a work-around for filling cpu-memory to be used until
/// hsa_amd_memory_fill is fixed. Should only be used for cpu memory.
/// \param[in] ptr Start address of memory to be filled.
/// \param[in] value Value to fill buffer with
/// \param[in] count Size of buffer to fill
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t hsa_memory_fill_workaround_cpu(void* ptr, uint32_t value,
size_t count);
/// This is a work-around for copying cpu-memory to be used until
/// hsa_amd_memory_copy is fixed. Should only be used for cpu memory.
/// \param[in] dst Destination address of memory to be copied
/// \param[in] src Source address of memory to be copied
/// \param[in] size Size of buffer to fill
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t hsa_memory_copy_workaround_cpu(void* dst, const void *src,
size_t size);
/// This is a work-around for copying memory to be used until
/// hsa_amd_memory_copy is fixed. Should be used when gpu local memory is
/// involved.
/// \param[in] dst Destination address of memory to be copied
/// \param[in] src Source address of memory to be copied
/// \param[in] size Size of buffer to fill
/// \param[in] dst_ag Destination agent handle
/// \param[in] src_ag Source agent handle
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t hsa_memory_copy_workaround_gen(void* dst, const void *src,
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag);
} // namespace rocrtst
#endif // ROCRTST_COMMON_COMMON_H_
+262
View File
@@ -0,0 +1,262 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/helper_funcs.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include <cmath>
#include <iostream>
#include <string>
#include <vector>
namespace rocrtst {
template<typename T>
void PrintArray(const std::string header, const T* data, const int width,
const int height) {
std::cout << std::endl << header << std::endl;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << data[i * width + j] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
}
template<typename T>
int FillRandom(T* arrayPtr,
const int width,
const int height,
const T rangeMin,
const T rangeMax,
unsigned int seed) {
if (!arrayPtr) {
return 1;
}
if (!seed) {
seed = (unsigned int)time(NULL);
}
srand(seed);
double range = static_cast<double>(rangeMax - rangeMin) + 1.0;
/* random initialisation of input */
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
int index = i * width + j;
arrayPtr[index] = rangeMin + T(range * rand_r(&seed) / (RAND_MAX + 1.0));
}
}
return 0;
}
uint64_t RoundToPowerOf2(uint64_t val) {
int bytes = sizeof(uint64_t);
val--;
for (int i = 0; i < bytes; i++) {
val |= val >> (1 << i);
}
val++;
return val;
}
bool IsPowerOf2(uint64_t val) {
uint64_t tmp = val;
if ((tmp & (-tmp)) - tmp == 0 && tmp != 0) {
return true;
} else {
return false;
}
}
bool
Compare(const float* refData, const float* data,
const int length, const float epsilon) {
float error = 0.0f;
float ref = 0.0f;
for (int i = 1; i < length; ++i) {
float diff = refData[i] - data[i];
error += diff * diff;
ref += refData[i] * refData[i];
}
float normRef =::sqrtf(static_cast<float>(ref));
if (::fabs(static_cast<float>(ref)) < 1e-7f) {
return false;
}
float normError = ::sqrtf(static_cast<float>(error));
error = normError / normRef;
return error < epsilon;
}
bool
Compare(const double* refData, const double* data,
const int length, const double epsilon) {
double error = 0.0;
double ref = 0.0;
for (int i = 1; i < length; ++i) {
double diff = refData[i] - data[i];
error += diff * diff;
ref += refData[i] * refData[i];
}
double normRef =::sqrt(static_cast<double>(ref));
if (::fabs(static_cast<double>(ref)) < 1e-7) {
return false;
}
double normError = ::sqrt(static_cast<double>(error));
error = normError / normRef;
return error < epsilon;
}
intptr_t
AlignDown(intptr_t value, size_t alignment) {
return (intptr_t) (value & ~(alignment - 1));
}
void *
AlignDown(void* value, size_t alignment) {
return reinterpret_cast<void*>(AlignDown(
reinterpret_cast<uintptr_t>(value), alignment));
}
void *
AlignUp(void* value, size_t alignment) {
return reinterpret_cast<void*>(
AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
alignment));
}
double CalcMedian(std::vector<double> scores) {
double median;
size_t size = scores.size();
if (size % 2 == 0) {
median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
} else {
median = scores[size / 2];
}
return median;
}
double CalcMean(std::vector<double> scores) {
double mean = 0;
size_t size = scores.size();
for (size_t i = 0; i < size; ++i) {
mean += scores[i];
}
return mean / size;
}
double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2) {
double mean = 0;
size_t size = v1.size();
for (size_t i = 0; i < size; i++) {
mean += v2[i] - v1[i];
}
return mean / size;
}
double CalcStdDeviation(std::vector<double> scores, int score_mean) {
double ret = 0.0;
for (size_t i = 0; i < scores.size(); ++i) {
ret += (scores[i] - score_mean) * (scores[i] - score_mean);
}
ret /= scores.size();
return sqrt(ret);
}
/////////////////////////////////////////////////////////////////
// Template Instantiations
/////////////////////////////////////////////////////////////////
template
void PrintArray<uint32_t>(const std::string, const unsigned int*, int, int);
template
void PrintArray<float>(const std::string, const float*, int, int);
template
int FillRandom<uint32_t>(uint32_t* arrayPtr,
const int width, const int height,
uint32_t rangeMin, uint32_t rangeMax,
unsigned int seed);
template
int FillRandom<float>(float* arrayPtr,
const int width, const int height,
float rangeMin, float rangeMax, unsigned int seed);
} // namespace rocrtst
+105
View File
@@ -0,0 +1,105 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_COMMON_HELPER_FUNCS_H_
#define ROCRTST_COMMON_HELPER_FUNCS_H_
/// \file
/// General-purpose helper functions
#include <string>
#include <vector>
namespace rocrtst {
bool Compare(const float* refData, const float* data,
const int length, const float epsilon = 1e-6f);
bool Compare(const double* refData, const double* data,
const int length, const double epsilon = 1e-6);
/// Calculate the mean number of the vector
double CalcMean(std::vector<double> scores);
/// Calculate the mean time of difference of the two vectors
double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2);
/// Return the median value of a vector of doubles
/// \param[in] scores Vector of doubles
/// \returns double Median value of provided vector
double CalcMedian(std::vector<double> scores);
/// Calculate the standard deviation of the vector
double CalcStdDeviation(std::vector<double> scores, int score_mean);
/// Display an array to std::out
template<typename T>
void PrintArray(
const std::string header,
const T* data,
const int width,
const int height);
/// Fill an array with random values
template<typename T>
int FillRandom(
T* arrayPtr,
const int width,
const int height,
const T rangeMin,
const T rangeMax,
unsigned int seed = 123);
intptr_t AlignDown(intptr_t value, size_t alignment);
void* AlignDown(void* value, size_t alignment);
void* AlignUp(void* value, size_t alignment);
/// Rounds to a power of 2
uint64_t RoundToPowerOf2(uint64_t val);
/// Checks if a value is a power of 2
bool IsPowerOf2(uint64_t val);
} // namespace rocrtst
#endif // ROCRTST_COMMON_HELPER_FUNCS_H_
+225
View File
@@ -0,0 +1,225 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/hsa_perf_cntrs.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <cassert>
#include <iostream>
#include <string>
#include <vector>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_profiler.h"
#include "hsa/amd_hsa_tools_interfaces.h"
namespace rocrtst {
static void
PreDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
assert((dispParam->pre_dispatch) &&
"Pre Dispatch Callback Param is Malformed");
hsa_ext_tools_pmu_t* perfMgr =
reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
hsa_status_t status = hsa_ext_tools_pmu_begin(*perfMgr, dispParam->queue,
dispParam->aql_translation_handle, true);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in beginning Perf Cntr Session");
}
static void
PostDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
assert((!dispParam->pre_dispatch) &&
"Post Dispatch Callback Param is Malformed");
hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
hsa_status_t status = hsa_ext_tools_pmu_end(*perfMgr, dispParam->queue,
dispParam->aql_translation_handle);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in endning Perf Cntr Session");
}
/// Constructor of the class
RocrPerfCntrApp::RocrPerfCntrApp() : perfMgr_(NULL) {
}
/// Destructor of the class. Ideally it should delete the
/// PMU and its counters
RocrPerfCntrApp::~RocrPerfCntrApp() {
}
/// Return the number of perf counters
uint32_t RocrPerfCntrApp::GetNumPerfCntrs() {
return uint32_t(cntrList_.size());
}
/// Return the handle of perf counter at specified index
CntrInfo* RocrPerfCntrApp::GetPerfCntr(uint32_t idx) {
return cntrList_[idx];
}
/// Print the various fields of Perf Cntrs being programmed.
bool RocrPerfCntrApp::PrintCntrs() {
CntrInfo* info;
int size = uint32_t(cntrList_.size());
for (int idx = 0; idx < size; idx++) {
info = cntrList_[idx];
std::cout << std::endl;
std::cout << "Rocr Perf Cntr Id: " << info->cntrId << std::endl;
std::cout << "Rocr Perf Cntr Name: " << info->cntrName << std::endl;
std::cout << "Rocr Perf Cntr Blk Id: " << info->blkId << std::endl;
std::cout << "Rocr Perf Cntr Value: " << info->cntrResult << std::endl;
std::cout << "Rocr Perf Cntr Validation: " << info->cnfType << std::endl;
std::cout << std::endl;
}
return true;
}
// Initialize the list of perf counters
// block id of kHsaAiCounterBlockSQ = 14 == 0x0E
hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) {
// Initialize the list of Perf Cntrs
// Add SQ counter for number of waves
CntrInfo* info = NULL;
cntrList_.reserve(23);
// Event for number of Waves
info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL,
0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
cntrList_.push_back(info);
// Event for number of Threads
info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL,
0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
cntrList_.push_back(info);
// Create an instance of Perf Mgr
hsa_status_t status;
status = hsa_ext_tools_create_pmu(agent, &perfMgr_);
assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr Mgr");
// Process each counter from the list as necessary
// each counter descriptor with its perf block handle
// and create an instance of counter in that block
uint32_t size = GetNumPerfCntrs();
for (uint32_t idx = 0; idx < size; idx++) {
info = GetPerfCntr(idx);
// Obtain the handle of perf block
if (info->blkHndl == NULL) {
status = hsa_ext_tools_get_counter_block_by_id(perfMgr_,
info->blkId, &info->blkHndl);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in getting Perf Cntr Blk Hndl");
}
// Create an instance of counter in the perf block
status = hsa_ext_tools_create_counter(info->blkHndl, &info->cntrHndl);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in creating Perf Cntr in Perf Blk");
// Update the Event Index property of counter
uint32_t cntrProp = HSA_EXT_TOOLS_COUNTER_PARAMETER_EVENT_INDEX;
status = hsa_ext_tools_set_counter_parameter(info->cntrHndl, cntrProp,
sizeof(uint32_t), static_cast<void*>(&info->cntrId));
assert((status == HSA_STATUS_SUCCESS) &&
"Error in updating Perf Cntr Property Event Index");
// Enable the updated perf counter
status = hsa_ext_tools_set_counter_enabled(info->cntrHndl, true);
assert((status == HSA_STATUS_SUCCESS) && "Error in enabing Perf Cntr");
}
return status;
}
// Register Pre and Post dispatch callbacks
void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t* queue) {
hsa_status_t status;
status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback,
PostDispatchCallback);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in registering Pre & Post Dispatch Callbacks");
status = hsa_ext_tools_set_callback_arguments(queue, &perfMgr_, &perfMgr_);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in registering Pre & Post Dispatch Callback Params");
return;
}
// Wait for perf counter collection to complete
hsa_status_t RocrPerfCntrApp::Wait() {
hsa_status_t status;
status = hsa_ext_tools_pmu_wait_for_completion(perfMgr_, 5000);
assert((status == HSA_STATUS_SUCCESS) &&
"Error in Waiting for Perf Cntr Completion");
return status;
}
// Validate perf counter values
hsa_status_t RocrPerfCntrApp::Validate() {
// Retrieve the results of the different Perf Cntrs
// and validate them as configured
CntrInfo* info = NULL;
hsa_status_t status = HSA_STATUS_SUCCESS;
uint32_t size = GetNumPerfCntrs();
for (uint32_t idx = 0; idx < size; idx++) {
info = GetPerfCntr(idx);
status = hsa_ext_tools_get_counter_result(info->cntrHndl,
&info->cntrResult);
std::cout << "Value of Perf Cntr is: " << info->cntrResult << std::endl;
}
return status;
}
} // namespace rocrtst
+159
View File
@@ -0,0 +1,159 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file
/// Contains counter related functionality that can be used by samples and
/// tests.
#ifndef ROCRTST_COMMON_HSA_PERF_CNTRS_H_
#define ROCRTST_COMMON_HSA_PERF_CNTRS_H_
#include "hsa/hsa.h"
#include "hsa/hsa_ext_profiler.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
namespace rocrtst {
typedef enum CntrValCnfType {
///< no counter value validation should be performed
CntrValCnf_None,
///< counter value should be an exact match to expectedResult
CntrValCnf_Exact,
///< counter value should be greater than expectedResult
CntrValCnf_GreaterThan,
///< counter value should be less than expectedResult
CntrValCnf_LessThan
} CntrValCnfType;
/// Struct used to encapsulate Counter Info
typedef struct CntrInfo {
///< Id of counter in hardware block
uint32_t cntrId;
///< Name of counter
char cntrName[72];
///< Handle of perf counter
hsa_ext_tools_counter_t cntrHndl;
///< Id of hardware block containing the counter
uint32_t blkId;
///< Handle of counter block
hsa_ext_tools_counter_block_t blkHndl;
///< Expected value of perf counte
uint64_t expectedResult;
///< Value of perf counter expected
uint64_t cntrResult;
///< Type of validation upon completion of dispatch
CntrValCnfType cnfType;
CntrInfo(uint32_t cntrId, const char* cntrName, void* cntrHndl,
uint32_t blkId, void* blkHndl,
uint64_t expResult, uint64_t result, CntrValCnfType cnfType) {
this->cntrId = cntrId;
this->cntrHndl = cntrHndl;
this->blkId = blkId;
this->blkHndl = blkHndl;
this->expectedResult = expResult;
this->cntrResult = result;
this->cnfType = cnfType;
memcpy(this->cntrName, cntrName, strlen(cntrName));
}
} CntrInfo;
class RocrPerfCntrApp {
public:
// Constructor of the class. Will initialize the list of perf counters
// that will be used to program the device
RocrPerfCntrApp();
// Destructor of the class
~RocrPerfCntrApp();
// Return the number of perf counters
uint32_t GetNumPerfCntrs();
// Return the handle of perf counter at specified index
CntrInfo* GetPerfCntr(uint32_t idx);
// Print the list of perf counters
bool PrintCntrs();
// Initialize the list of perf counters
hsa_status_t Init(hsa_agent_t agent);
// Register Pre and Post dispatch callbacks
void RegisterCallbacks(hsa_queue_t* queue);
// Wait for perf counter collection to complete
hsa_status_t Wait();
// Validate perf counter values
hsa_status_t Validate();
private:
// Number of queues to create
std::vector<CntrInfo*> cntrList_;
// Handle of Perf Cntr Manager
hsa_ext_tools_pmu_t perfMgr_;
};
} // namespace rocrtst
#endif // ROCRTST_COMMON_HSA_PERF_CNTRS_H_
+190
View File
@@ -0,0 +1,190 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/hsatimer.h"
#include <x86intrin.h>
namespace rocrtst {
static const uint64_t kNanosecondsPerSecond = 1000000000;
PerfTimer::PerfTimer(void) {
freq_in_100mhz = MeasureTSCFreqHz();
}
PerfTimer::~PerfTimer() {
while (!_timers.empty()) {
Timer* temp = _timers.back();
_timers.pop_back();
delete temp;
}
}
int PerfTimer::CreateTimer(void) {
Timer* newTimer = new Timer;
newTimer->_start = 0;
newTimer->_clocks = 0;
newTimer->_freq = kNanosecondsPerSecond;
/* Push back the address of new Timer instance created */
_timers.push_back(newTimer);
return static_cast<int>(_timers.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= static_cast<int>(_timers.size())) {
Error("Cannot reset timer. Invalid handle.");
return 1;
}
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
_timers[index]->_start = (uint64_t) s.tv_sec * kNanosecondsPerSecond
+ (uint64_t) s.tv_nsec;
#else
// AMD timing method
unsigned int unused;
_timers[index]->_start = __rdtscp(&unused);
#endif
return 0;
}
int PerfTimer::StopTimer(int index) {
uint64_t n = 0;
if (index >= static_cast<int>(_timers.size())) {
Error("Cannot reset timer. Invalid handle.");
return 1;
}
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
n = (uint64_t) s.tv_sec * kNanosecondsPerSecond + (uint64_t) s.tv_nsec;
#else
// AMD Linux timing
unsigned int unused;
n = __rdtscp(&unused);
#endif
n -= _timers[index]->_start;
_timers[index]->_start = 0;
#ifndef _AMD
_timers[index]->_clocks += n;
#else
// convert to ms
_timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz;
cout << "_AMD is enabled!!!" << endl;
#endif
return 0;
}
void PerfTimer::Error(std::string str) {
std::cout << str << std::endl;
}
double PerfTimer::ReadTimer(int index) {
if (index >= static_cast<int>(_timers.size())) {
Error("Cannot read timer. Invalid handle.");
return 1;
}
double reading = static_cast<double>(_timers[index]->_clocks);
reading = static_cast<double>(reading / _timers[index]->_freq);
return reading;
}
void PerfTimer::ResetTimer(int index) {
// Check if index value is over the timer's size
if (index >= static_cast<int>(_timers.size())) {
Error("Invalid index value\n");
exit(1);
}
_timers[index]->_clocks = 0.0;
_timers[index]->_start = 0.0;
}
uint64_t PerfTimer::CoarseTimestampUs() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
}
while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}
} // namespace rocrtst
+106
View File
@@ -0,0 +1,106 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_COMMON_HSATIMER_H_
#define ROCRTST_COMMON_HSATIMER_H_
#include <stdint.h>
#include <iostream>
#include <vector>
#include <string>
/// \file
/// Timer related class.
namespace rocrtst {
class PerfTimer {
private:
struct Timer {
std::string name; /* < name name of time object*/
uint64_t _freq; /* < _freq frequency*/
uint64_t _clocks; /* < _clocks number of ticks at end*/
uint64_t _start; /* < _start start point ticks*/
};
std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
double freq_in_100mhz;
public:
PerfTimer(void);
~PerfTimer(void);
/// Create a new timer.
/// \returns A new timer instantance index
int CreateTimer(void);
/// Start the timer associated with the given index
/// \param[in] index Index of the timer to start
/// \returns int 0 for success, non-zero otherwise
int StartTimer(int index);
/// Stop the timer associated with the given index
/// \param[in] Index Index of the timer to stop
/// \returns int 0 for success, non-zero otherwise
int StopTimer(int index);
/// Reset the timer to 0
/// param[in] Index of the timer to reset
/// \returns void
void ResetTimer(int index);
/// Read the time value of the timer associated with the provided index.
/// \param[in] index Index of the timer to read
/// \returns double Value of the timer
double ReadTimer(int index);
private:
void Error(std::string str);
uint64_t CoarseTimestampUs();
uint64_t MeasureTSCFreqHz();
};
} // namespace rocrtst
#endif // ROCRTST_COMMON_HSATIMER_H_
+66
View File
@@ -0,0 +1,66 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/os.h"
#include <stdlib.h>
namespace rocrtst {
void SetEnv(const char* env_var_name, const char* env_var_value) {
int err = setenv(env_var_name, env_var_value, 1);
if (0 != err) {
printf("Set environment variable failed!\n");
exit(1);
}
return;
}
char* GetEnv(const char* env_var_name) {
return getenv(env_var_name);
}
} // namespace rocrtst
+67
View File
@@ -0,0 +1,67 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/// \file OS specific functionality
#ifndef ROCRTST_COMMON_OS_H_
#define ROCRTST_COMMON_OS_H_
#include <stdio.h>
namespace rocrtst {
/// Set envriroment variable.
/// \param[in] env_var_name Environment variable to set.
/// \param[in] env_var_value Value to set environment variable to.
/// \returns void
void SetEnv(const char* env_var_name, const char* env_var_value);
/// Get envriroment variable.
/// \param[in] env_var_name Environment variable to get.
/// \returns Pointer to string of characters that is the value of the
/// environment variable.
char* GetEnv(const char* env_var_name);
} // namespace rocrtst
#endif // ROCRTST_COMMON_OS_H_
@@ -0,0 +1,31 @@
#
# Source files for Tests verifying rocrtst Utils library
#
set (rocrtstUtilsTestSrcs utils_timer_gtest.cpp)
set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_timer_test.cpp)
set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_cpp11_gtest.cpp)
#
# Header files include path(s).
#
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/utils)
include_directories(${PROJECT_SOURCE_DIR}/gtest/include)
#
# Build rule to build an executable object
#
add_executable(${ROCRTST_UTIL_TEST_NAME} ${rocrtstUtilsTestSrcs})
#
# Link unresolved symbols of rocrtst Utils Test executable
#
target_link_libraries(${ROCRTST_UTIL_TEST_NAME} ${ROCRTST_LIBS} elf c stdc++ dl pthread rt)
#
# Install build artifacts into one common location
#
INSTALL(TARGETS ${ROCRTST_UTIL_TEST_NAME}
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,46 @@
#include<iostream>
#include<thread>
#include"gtest/gtest.h"
using std::cout;
using std::endl;
// @Brief: this function is defined to be executed for thread #1
static void ThreadEntry1() {
cout << "The first thread is launched!" << endl;
return;
}
// @Brief: this function is defined to be executed for thread #2
static void ThreadEntry2() {
cout << "The second thread is launched!" << endl;
return;
}
// @Brief: google test case added for basic C++11 thread feature.
// Here, in main function, it will create two threas objects, then,
// check if each thread are joinable, if so, main thread wait until
// the spawned threads finish.
TEST(rocrtstCpp11Feature, BasicThread) {
// Define two threads object;
std::thread thread1;
std::thread thread2;
// At this point, it should be non-joinable
ASSERT_EQ(false, thread1.joinable());
ASSERT_EQ(false, thread2.joinable());
// Assign execution codes to threads;
thread1 = std::thread(ThreadEntry1);
thread2 = std::thread(ThreadEntry2);
// Now, the two threads should be joinable
ASSERT_EQ(true, thread1.joinable());
ASSERT_EQ(true, thread2.joinable());
// Join the two threads until they finish
thread1.join();
thread2.join();
// When execution flow reaches here, it succeed.
cout << "Done!" << endl;
}
@@ -0,0 +1,32 @@
#include <iostream>
#include "gtest/gtest.h"
#include "utils_timer_test.hpp"
using namespace std;
class rocrtstUtilsTimerGtest : public ::testing::Test {
protected:
// No argument constructor called from Google Test Framework
rocrtstUtilsTimerGtest() { };
};
TEST_F(rocrtstUtilsTimerGtest, TestingTimer101) {
// Create a Hsa Perf Utils Timer Test object.
// The test will iterate 108 times with sleep
// time of 3 milliseconds per iteration
rocrtstUtilsTimerTest* timer = new rocrtstUtilsTimerTest(108, 3);
// Let the timer object collect data
timer->run();
// Print the statistics of timer object
timer->print();
}
@@ -0,0 +1,45 @@
#include <iostream>
#include "hsatimer.h"
#include <unistd.h>
#include "utils_timer_test.hpp"
using namespace std;
// Destructor method of test driver
rocrtstUtilsTimerTest::~rocrtstUtilsTimerTest() { }
// Constructor method of test driver
//
// @brief loopCnt number of times to call sleep Api
//
// @brief sleepTimer time to sleep in milliseconds
rocrtstUtilsTimerTest::rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime) :
loopCnt_(loopCnt), sleepTime_(sleepTime), total_time_(0) { }
// Execute user defined number of sleep calls and collect the
// total time taken by such calls
void rocrtstUtilsTimerTest::run() {
double time;
PerfTimer timer;
uint32_t index = timer.CreateTimer();
for (uint32_t idx; idx < loopCnt_; idx++) {
timer.StartTimer(index);
usleep(sleepTime_);
timer.StopTimer(index);
time = timer.ReadTimer(index);
total_time_ += time;
}
}
// Print time reported by Hsa Perf Utils Timer service
void rocrtstUtilsTimerTest::print() {
std::cout << "Time taken by " << loopCnt_;
std::cout << " iterations of sleep is: " << total_time_ << std::endl;
}
@@ -0,0 +1,38 @@
#ifndef ROCRTST_UTILS_TIMER_TEST_H_
#define ROCRTST_UTILS_TIMER_TEST_H_
// Encapsulates Api's to access Timer service of rocrtst Utils library
class rocrtstUtilsTimerTest {
public:
// Destructor method of test driver
~rocrtstUtilsTimerTest();
// Constructor method of test driver
//
// @brief loopCnt number of times to call sleep Api
//
// @brief sleepTimer time to sleep in milliseconds
rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime);
// Execute user defined number of sleep calls and collect the
// total time taken by such calls
void run();
// Print time reported by rocrtst Utils Timer service
void print();
private:
// Number of times to invoke sleep Api
uint32_t loopCnt_;
// Time to sleep per cycle, in milliseconds
uint32_t sleepTime_;
// Time taken by sleep Api
double total_time_;
};
#endif
+31
View File
@@ -0,0 +1,31 @@
#
# Source files for Google Test Framework
#
set (gtFrwkSrcs src/gtest.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-port.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-printers.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-filepath.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-test-part.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-typed-test.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-death-test.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest_main.cpp)
#
# Header files include path(s).
#
include_directories(include)
include_directories(include/gtest)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
#
# Build Google Test Framework as a Static Library object
#
add_library(${GOOGLE_TEST_FRWK_NAME} STATIC ${gtFrwkSrcs})
#
# Install build artifacts into one common location
#
INSTALL(TARGETS ${GOOGLE_TEST_FRWK_NAME}
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,294 @@
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for death tests. It is
// #included by gtest.h so a user doesn't need to include this
// directly.
#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
#include "gtest/internal/gtest-death-test-internal.h"
namespace testing {
// This flag controls the style of death tests. Valid values are "threadsafe",
// meaning that the death test child process will re-execute the test binary
// from the start, running only a single death test, or "fast",
// meaning that the child process will execute the test logic immediately
// after forking.
GTEST_DECLARE_string_(death_test_style);
#if GTEST_HAS_DEATH_TEST
namespace internal {
// Returns a Boolean value indicating whether the caller is currently
// executing in the context of the death test child process. Tools such as
// Valgrind heap checkers may need this to modify their behavior in death
// tests. IMPORTANT: This is an internal utility. Using it may break the
// implementation of death tests. User code MUST NOT use it.
GTEST_API_ bool InDeathTestChild();
} // namespace internal
// The following macros are useful for writing death tests.
// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
// executed:
//
// 1. It generates a warning if there is more than one active
// thread. This is because it's safe to fork() or clone() only
// when there is a single thread.
//
// 2. The parent process clone()s a sub-process and runs the death
// test in it; the sub-process exits with code 0 at the end of the
// death test, if it hasn't exited already.
//
// 3. The parent process waits for the sub-process to terminate.
//
// 4. The parent process checks the exit code and error message of
// the sub-process.
//
// Examples:
//
// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
// for (int i = 0; i < 5; i++) {
// EXPECT_DEATH(server.ProcessRequest(i),
// "Invalid request .* in ProcessRequest()")
// << "Failed to die on request " << i;
// }
//
// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
//
// bool KilledBySIGHUP(int exit_code) {
// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
// }
//
// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
//
// On the regular expressions used in death tests:
//
// On POSIX-compliant systems (*nix), we use the <regex.h> library,
// which uses the POSIX extended regex syntax.
//
// On other platforms (e.g. Windows), we only support a simple regex
// syntax implemented as part of Google Test. This limited
// implementation should be enough most of the time when writing
// death tests; though it lacks many features you can find in PCRE
// or POSIX extended regex syntax. For example, we don't support
// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
// repetition count ("x{5,7}"), among others.
//
// Below is the syntax that we do support. We chose it to be a
// subset of both PCRE and POSIX extended regex, so it's easy to
// learn wherever you come from. In the following: 'A' denotes a
// literal character, period (.), or a single \\ escape sequence;
// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
// natural numbers.
//
// c matches any literal character c
// \\d matches any decimal digit
// \\D matches any character that's not a decimal digit
// \\f matches \f
// \\n matches \n
// \\r matches \r
// \\s matches any ASCII whitespace, including \n
// \\S matches any character that's not a whitespace
// \\t matches \t
// \\v matches \v
// \\w matches any letter, _, or decimal digit
// \\W matches any character that \\w doesn't match
// \\c matches any literal character c, which must be a punctuation
// . matches any single character except \n
// A? matches 0 or 1 occurrences of A
// A* matches 0 or many occurrences of A
// A+ matches 1 or many occurrences of A
// ^ matches the beginning of a string (not that of each line)
// $ matches the end of a string (not that of each line)
// xy matches x followed by y
//
// If you accidentally use PCRE or POSIX extended regex features
// not implemented by us, you will get a run-time failure. In that
// case, please try to rewrite your regular expression within the
// above syntax.
//
// This implementation is *not* meant to be as highly tuned or robust
// as a compiled regex library, but should perform well enough for a
// death test, which already incurs significant overhead by launching
// a child process.
//
// Known caveats:
//
// A "threadsafe" style death test obtains the path to the test
// program from argv[0] and re-executes it in the sub-process. For
// simplicity, the current implementation doesn't search the PATH
// when launching the sub-process. This means that the user must
// invoke the test program via a path that contains at least one
// path separator (e.g. path/to/foo_test and
// /absolute/path/to/bar_test are fine, but foo_test is not). This
// is rarely a problem as people usually don't put the test binary
// directory in PATH.
//
// TODO(wan@google.com): make thread-safe death tests search the PATH.
// Asserts that a given statement causes the program to exit, with an
// integer exit status that satisfies predicate, and emitting error output
// that matches regex.
# define ASSERT_EXIT(statement, predicate, regex) \
GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
// Like ASSERT_EXIT, but continues on to successive tests in the
// test case, if any:
# define EXPECT_EXIT(statement, predicate, regex) \
GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
// Asserts that a given statement causes the program to exit, either by
// explicitly exiting with a nonzero exit code or being killed by a
// signal, and emitting error output that matches regex.
# define ASSERT_DEATH(statement, regex) \
ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
// Like ASSERT_DEATH, but continues on to successive tests in the
// test case, if any:
# define EXPECT_DEATH(statement, regex) \
EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
// Tests that an exit code describes a normal exit with a given exit code.
class GTEST_API_ ExitedWithCode {
public:
explicit ExitedWithCode(int exit_code);
bool operator()(int exit_status) const;
private:
// No implementation - assignment is unsupported.
void operator=(const ExitedWithCode& other);
const int exit_code_;
};
# if !GTEST_OS_WINDOWS
// Tests that an exit code describes an exit due to termination by a
// given signal.
class GTEST_API_ KilledBySignal {
public:
explicit KilledBySignal(int signum);
bool operator()(int exit_status) const;
private:
const int signum_;
};
# endif // !GTEST_OS_WINDOWS
// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
// The death testing framework causes this to have interesting semantics,
// since the sideeffects of the call are only visible in opt mode, and not
// in debug mode.
//
// In practice, this can be used to test functions that utilize the
// LOG(DFATAL) macro using the following style:
//
// int DieInDebugOr12(int* sideeffect) {
// if (sideeffect) {
// *sideeffect = 12;
// }
// LOG(DFATAL) << "death";
// return 12;
// }
//
// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
// int sideeffect = 0;
// // Only asserts in dbg.
// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
//
// #ifdef NDEBUG
// // opt-mode has sideeffect visible.
// EXPECT_EQ(12, sideeffect);
// #else
// // dbg-mode no visible sideeffect.
// EXPECT_EQ(0, sideeffect);
// #endif
// }
//
// This will assert that DieInDebugReturn12InOpt() crashes in debug
// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
// appropriate fallback value (12 in this case) in opt mode. If you
// need to test that a function has appropriate side-effects in opt
// mode, include assertions against the side-effects. A general
// pattern for this is:
//
// EXPECT_DEBUG_DEATH({
// // Side-effects here will have an effect after this statement in
// // opt mode, but none in debug mode.
// EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
// }, "death");
//
# ifdef NDEBUG
# define EXPECT_DEBUG_DEATH(statement, regex) \
GTEST_EXECUTE_STATEMENT_(statement, regex)
# define ASSERT_DEBUG_DEATH(statement, regex) \
GTEST_EXECUTE_STATEMENT_(statement, regex)
# else
# define EXPECT_DEBUG_DEATH(statement, regex) \
EXPECT_DEATH(statement, regex)
# define ASSERT_DEBUG_DEATH(statement, regex) \
ASSERT_DEATH(statement, regex)
# endif // NDEBUG for EXPECT_DEBUG_DEATH
#endif // GTEST_HAS_DEATH_TEST
// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
// death tests are supported; otherwise they just issue a warning. This is
// useful when you are combining death test assertions with normal test
// assertions in one test.
#if GTEST_HAS_DEATH_TEST
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
EXPECT_DEATH(statement, regex)
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
ASSERT_DEATH(statement, regex)
#else
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
#endif
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
@@ -0,0 +1,253 @@
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the Message class.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
// program!
#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
#include <limits>
#include "gtest/internal/gtest-port.h"
// Ensures that there is at least one operator<< in the global namespace.
// See Message& operator<<(...) below for why.
void operator<<(const testing::internal::Secret&, int);
namespace testing {
// The Message class works like an ostream repeater.
//
// Typical usage:
//
// 1. You stream a bunch of values to a Message object.
// It will remember the text in a stringstream.
// 2. Then you stream the Message object to an ostream.
// This causes the text in the Message to be streamed
// to the ostream.
//
// For example;
//
// testing::Message foo;
// foo << 1 << " != " << 2;
// std::cout << foo;
//
// will print "1 != 2".
//
// Message is not intended to be inherited from. In particular, its
// destructor is not virtual.
//
// Note that stringstream behaves differently in gcc and in MSVC. You
// can stream a NULL char pointer to it in the former, but not in the
// latter (it causes an access violation if you do). The Message
// class hides this difference by treating a NULL char pointer as
// "(null)".
class GTEST_API_ Message {
private:
// The type of basic IO manipulators (endl, ends, and flush) for
// narrow streams.
typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
public:
// Constructs an empty Message.
Message();
// Copy constructor.
Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
*ss_ << msg.GetString();
}
// Constructs a Message from a C-string.
explicit Message(const char* str) : ss_(new ::std::stringstream) {
*ss_ << str;
}
#if GTEST_OS_SYMBIAN
// Streams a value (either a pointer or not) to this object.
template <typename T>
inline Message& operator <<(const T& value) {
StreamHelper(typename internal::is_pointer<T>::type(), value);
return *this;
}
#else
// Streams a non-pointer value to this object.
template <typename T>
inline Message& operator <<(const T& val) {
// Some libraries overload << for STL containers. These
// overloads are defined in the global namespace instead of ::std.
//
// C++'s symbol lookup rule (i.e. Koenig lookup) says that these
// overloads are visible in either the std namespace or the global
// namespace, but not other namespaces, including the testing
// namespace which Google Test's Message class is in.
//
// To allow STL containers (and other types that has a << operator
// defined in the global namespace) to be used in Google Test
// assertions, testing::Message must access the custom << operator
// from the global namespace. With this using declaration,
// overloads of << defined in the global namespace and those
// visible via Koenig lookup are both exposed in this function.
using ::operator <<;
*ss_ << val;
return *this;
}
// Streams a pointer value to this object.
//
// This function is an overload of the previous one. When you
// stream a pointer to a Message, this definition will be used as it
// is more specialized. (The C++ Standard, section
// [temp.func.order].) If you stream a non-pointer, then the
// previous definition will be used.
//
// The reason for this overload is that streaming a NULL pointer to
// ostream is undefined behavior. Depending on the compiler, you
// may get "0", "(nil)", "(null)", or an access violation. To
// ensure consistent result across compilers, we always treat NULL
// as "(null)".
template <typename T>
inline Message& operator <<(T* const& pointer) { // NOLINT
if (pointer == NULL) {
*ss_ << "(null)";
}
else {
*ss_ << pointer;
}
return *this;
}
#endif // GTEST_OS_SYMBIAN
// Since the basic IO manipulators are overloaded for both narrow
// and wide streams, we have to provide this specialized definition
// of operator <<, even though its body is the same as the
// templatized version above. Without this definition, streaming
// endl or other basic IO manipulators to Message will confuse the
// compiler.
Message& operator <<(BasicNarrowIoManip val) {
*ss_ << val;
return *this;
}
// Instead of 1/0, we want to see true/false for bool values.
Message& operator <<(bool b) {
return *this << (b ? "true" : "false");
}
// These two overloads allow streaming a wide C string to a Message
// using the UTF-8 encoding.
Message& operator <<(const wchar_t* wide_c_str);
Message& operator <<(wchar_t* wide_c_str);
#if GTEST_HAS_STD_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& operator <<(const ::std::wstring& wstr);
#endif // GTEST_HAS_STD_WSTRING
#if GTEST_HAS_GLOBAL_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& operator <<(const ::wstring& wstr);
#endif // GTEST_HAS_GLOBAL_WSTRING
// Gets the text streamed to this object so far as an std::string.
// Each '\0' character in the buffer is replaced with "\\0".
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
std::string GetString() const;
private:
#if GTEST_OS_SYMBIAN
// These are needed as the Nokia Symbian Compiler cannot decide between
// const T& and const T* in a function template. The Nokia compiler _can_
// decide between class template specializations for T and T*, so a
// tr1::type_traits-like is_pointer works, and we can overload on that.
template <typename T>
inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
if (pointer == NULL) {
*ss_ << "(null)";
}
else {
*ss_ << pointer;
}
}
template <typename T>
inline void StreamHelper(internal::false_type /*is_pointer*/,
const T& value) {
// See the comments in Message& operator <<(const T&) above for why
// we need this using statement.
using ::operator <<;
*ss_ << value;
}
#endif // GTEST_OS_SYMBIAN
// We'll hold the text streamed to this object here.
const internal::scoped_ptr< ::std::stringstream> ss_;
// We declare (but don't implement) this to prevent the compiler
// from implementing the assignment operator.
void operator=(const Message&);
};
// Streams a Message to an ostream.
inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
return os << sb.GetString();
}
namespace internal {
// Converts a streamable value to an std::string. A NULL pointer is
// converted to "(null)". When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".
template <typename T>
std::string StreamableToString(const T& streamable) {
return (Message() << streamable).GetString();
}
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,868 @@
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// A user can teach this function how to print a class type T by
// defining either operator<<() or PrintTo() in the namespace that
// defines T. More specifically, the FIRST defined function in the
// following list will be used (assuming T is defined in namespace
// foo):
//
// 1. foo::PrintTo(const T&, ostream*)
// 2. operator<<(ostream&, const T&) defined in either foo or the
// global namespace.
//
// If none of the above is defined, it will print the debug string of
// the value if it is a protocol buffer, or print the raw bytes in the
// value otherwise.
//
// To aid debugging: when T is a reference type, the address of the
// value is also printed; when T is a (const) char pointer, both the
// pointer value and the NUL-terminated string it points to are
// printed.
//
// We also provide some convenient wrappers:
//
// // Prints a value to a string. For a (const or not) char
// // pointer, the NUL-terminated string (but not the pointer) is
// // printed.
// std::string ::testing::PrintToString(const T& value);
//
// // Prints a value tersely: for a reference type, the referenced
// // value (but not the address) is printed; for a (const or not) char
// // pointer, the NUL-terminated string (but not the pointer) is
// // printed.
// void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
//
// // Prints value using the type inferred by the compiler. The difference
// // from UniversalTersePrint() is that this function prints both the
// // pointer and the NUL-terminated string for a (const or not) char pointer.
// void ::testing::internal::UniversalPrint(const T& value, ostream*);
//
// // Prints the fields of a tuple tersely to a string vector, one
// // element for each field. Tuple support must be enabled in
// // gtest-port.h.
// std::vector<string> UniversalTersePrintTupleFieldsToStrings(
// const Tuple& value);
//
// Known limitation:
//
// The print primitives print the elements of an STL-style container
// using the compiler-inferred type of *iter where iter is a
// const_iterator of the container. When const_iterator is an input
// iterator but not a forward iterator, this inferred type may not
// match value_type, and the print output may be incorrect. In
// practice, this is rarely a problem as for most containers
// const_iterator is a forward iterator. We'll fix this if there's an
// actual need for it. Note that this fix cannot rely on value_type
// being defined as many user-defined container types don't have
// value_type.
#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
#include <ostream> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "gtest/internal/gtest-port.h"
#include "gtest/internal/gtest-internal.h"
namespace testing {
// Definitions in the 'internal' and 'internal2' name spaces are
// subject to change without notice. DO NOT USE THEM IN USER CODE!
namespace internal2 {
// Prints the given number of bytes in the given object to the given
// ostream.
GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
size_t count,
::std::ostream* os);
// For selecting which printer to use when a given type has neither <<
// nor PrintTo().
enum TypeKind {
kProtobuf, // a protobuf type
kConvertibleToInteger, // a type implicitly convertible to BiggestInt
// (e.g. a named or unnamed enum type)
kOtherType // anything else
};
// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
// by the universal printer to print a value of type T when neither
// operator<< nor PrintTo() is defined for T, where kTypeKind is the
// "kind" of T as defined by enum TypeKind.
template <typename T, TypeKind kTypeKind>
class TypeWithoutFormatter {
public:
// This default version is called when kTypeKind is kOtherType.
static void PrintValue(const T& value, ::std::ostream* os) {
PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
sizeof(value), os);
}
};
// We print a protobuf using its ShortDebugString() when the string
// doesn't exceed this many characters; otherwise we print it using
// DebugString() for better readability.
const size_t kProtobufOneLinerMaxLength = 50;
template <typename T>
class TypeWithoutFormatter<T, kProtobuf> {
public:
static void PrintValue(const T& value, ::std::ostream* os) {
const ::testing::internal::string short_str = value.ShortDebugString();
const ::testing::internal::string pretty_str =
short_str.length() <= kProtobufOneLinerMaxLength ?
short_str : ("\n" + value.DebugString());
*os << ("<" + pretty_str + ">");
}
};
template <typename T>
class TypeWithoutFormatter<T, kConvertibleToInteger> {
public:
// Since T has no << operator or PrintTo() but can be implicitly
// converted to BiggestInt, we print it as a BiggestInt.
//
// Most likely T is an enum type (either named or unnamed), in which
// case printing it as an integer is the desired behavior. In case
// T is not an enum, printing it as an integer is the best we can do
// given that it has no user-defined printer.
static void PrintValue(const T& value, ::std::ostream* os) {
const internal::BiggestInt kBigInt = value;
*os << kBigInt;
}
};
// Prints the given value to the given ostream. If the value is a
// protocol message, its debug string is printed; if it's an enum or
// of a type implicitly convertible to BiggestInt, it's printed as an
// integer; otherwise the bytes in the value are printed. This is
// what UniversalPrinter<T>::Print() does when it knows nothing about
// type T and T has neither << operator nor PrintTo().
//
// A user can override this behavior for a class type Foo by defining
// a << operator in the namespace where Foo is defined.
//
// We put this operator in namespace 'internal2' instead of 'internal'
// to simplify the implementation, as much code in 'internal' needs to
// use << in STL, which would conflict with our own << were it defined
// in 'internal'.
//
// Note that this operator<< takes a generic std::basic_ostream<Char,
// CharTraits> type instead of the more restricted std::ostream. If
// we define it to take an std::ostream instead, we'll get an
// "ambiguous overloads" compiler error when trying to print a type
// Foo that supports streaming to std::basic_ostream<Char,
// CharTraits>, as the compiler cannot tell whether
// operator<<(std::ostream&, const T&) or
// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
// specific.
template <typename Char, typename CharTraits, typename T>
::std::basic_ostream<Char, CharTraits>& operator<<(
::std::basic_ostream<Char, CharTraits>& os, const T& x) {
TypeWithoutFormatter < T,
(internal::IsAProtocolMessage<T>::value ? kProtobuf :
internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
kConvertibleToInteger : kOtherType) >::PrintValue(x, &os);
return os;
}
} // namespace internal2
} // namespace testing
// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
// magic needed for implementing UniversalPrinter won't work.
namespace testing_internal {
// Used to print a value that is not an STL-style container when the
// user doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
// With the following statement, during unqualified name lookup,
// testing::internal2::operator<< appears as if it was declared in
// the nearest enclosing namespace that contains both
// ::testing_internal and ::testing::internal2, i.e. the global
// namespace. For more details, refer to the C++ Standard section
// 7.3.4-1 [namespace.udir]. This allows us to fall back onto
// testing::internal2::operator<< in case T doesn't come with a <<
// operator.
//
// We cannot write 'using ::testing::internal2::operator<<;', which
// gcc 3.3 fails to compile due to a compiler bug.
using namespace ::testing::internal2; // NOLINT
// Assuming T is defined in namespace foo, in the next statement,
// the compiler will consider all of:
//
// 1. foo::operator<< (thanks to Koenig look-up),
// 2. ::operator<< (as the current namespace is enclosed in ::),
// 3. testing::internal2::operator<< (thanks to the using statement above).
//
// The operator<< whose type matches T best will be picked.
//
// We deliberately allow #2 to be a candidate, as sometimes it's
// impossible to define #1 (e.g. when foo is ::std, defining
// anything in it is undefined behavior unless you are a compiler
// vendor.).
*os << value;
}
} // namespace testing_internal
namespace testing {
namespace internal {
// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
// value to the given ostream. The caller must ensure that
// 'ostream_ptr' is not NULL, or the behavior is undefined.
//
// We define UniversalPrinter as a class template (as opposed to a
// function template), as we need to partially specialize it for
// reference types, which cannot be done with function templates.
template <typename T>
class UniversalPrinter;
template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os);
// Used to print an STL-style container when the user doesn't define
// a PrintTo() for it.
template <typename C>
void DefaultPrintTo(IsContainer /* dummy */,
false_type /* is not a pointer */,
const C& container, ::std::ostream* os) {
const size_t kMaxCount = 32; // The maximum number of elements to print.
*os << '{';
size_t count = 0;
for (typename C::const_iterator it = container.begin();
it != container.end(); ++it, ++count) {
if (count > 0) {
*os << ',';
if (count == kMaxCount) { // Enough has been printed.
*os << " ...";
break;
}
}
*os << ' ';
// We cannot call PrintTo(*it, os) here as PrintTo() doesn't
// handle *it being a native array.
internal::UniversalPrint(*it, os);
}
if (count > 0) {
*os << ' ';
}
*os << '}';
}
// Used to print a pointer that is neither a char pointer nor a member
// pointer, when the user doesn't define PrintTo() for it. (A member
// variable pointer or member function pointer doesn't really point to
// a location in the address space. Their representation is
// implementation-defined. Therefore they will be printed as raw
// bytes.)
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
true_type /* is a pointer */,
T* p, ::std::ostream* os) {
if (p == NULL) {
*os << "NULL";
}
else {
// C++ doesn't allow casting from a function pointer to any object
// pointer.
//
// IsTrue() silences warnings: "Condition is always true",
// "unreachable code".
if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
// T is not a function type. We just call << to print p,
// relying on ADL to pick up user-defined << for their pointer
// types, if any.
*os << p;
}
else {
// T is a function type, so '*os << p' doesn't do what we want
// (it just prints p as bool). We want to print p as a const
// void*. However, we cannot cast it to const void* directly,
// even using reinterpret_cast, as earlier versions of gcc
// (e.g. 3.4.5) cannot compile the cast when p is a function
// pointer. Casting to UInt64 first solves the problem.
*os << reinterpret_cast<const void*>(
reinterpret_cast<internal::UInt64>(p));
}
}
}
// Used to print a non-container, non-pointer value when the user
// doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
false_type /* is not a pointer */,
const T& value, ::std::ostream* os) {
::testing_internal::DefaultPrintNonContainerTo(value, os);
}
// Prints the given value using the << operator if it has one;
// otherwise prints the bytes in it. This is what
// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
// or overloaded for type T.
//
// A user can override this behavior for a class type Foo by defining
// an overload of PrintTo() in the namespace where Foo is defined. We
// give the user this option as sometimes defining a << operator for
// Foo is not desirable (e.g. the coding style may prevent doing it,
// or there is already a << operator but it doesn't do what the user
// wants).
template <typename T>
void PrintTo(const T& value, ::std::ostream* os) {
// DefaultPrintTo() is overloaded. The type of its first two
// arguments determine which version will be picked. If T is an
// STL-style container, the version for container will be called; if
// T is a pointer, the pointer version will be called; otherwise the
// generic version will be called.
//
// Note that we check for container types here, prior to we check
// for protocol message types in our operator<<. The rationale is:
//
// For protocol messages, we want to give people a chance to
// override Google Mock's format by defining a PrintTo() or
// operator<<. For STL containers, other formats can be
// incompatible with Google Mock's format for the container
// elements; therefore we check for container types here to ensure
// that our format is used.
//
// The second argument of DefaultPrintTo() is needed to bypass a bug
// in Symbian's C++ compiler that prevents it from picking the right
// overload between:
//
// PrintTo(const T& x, ...);
// PrintTo(T* x, ...);
DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
}
// The following list of PrintTo() overloads tells
// UniversalPrinter<T>::Print() how to print standard types (built-in
// types, strings, plain arrays, and pointers).
// Overloads for various char types.
GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
inline void PrintTo(char c, ::std::ostream* os) {
// When printing a plain char, we always treat it as unsigned. This
// way, the output won't be affected by whether the compiler thinks
// char is signed or not.
PrintTo(static_cast<unsigned char>(c), os);
}
// Overloads for other simple built-in types.
inline void PrintTo(bool x, ::std::ostream* os) {
*os << (x ? "true" : "false");
}
// Overload for wchar_t type.
// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its decimal code (except for L'\0').
// The L'\0' char is printed as "L'\\0'". The decimal code is printed
// as signed integer when wchar_t is implemented by the compiler
// as a signed type and is printed as an unsigned integer when wchar_t
// is implemented as an unsigned type.
GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
// Overloads for C strings.
GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
inline void PrintTo(char* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const char*>(s), os);
}
// signed/unsigned char is often used for representing binary data, so
// we print pointers to it as void* to be safe.
inline void PrintTo(const signed char* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(signed char* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(unsigned char* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const void*>(s), os);
}
// MSVC can be configured to define wchar_t as a typedef of unsigned
// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
// type. When wchar_t is a typedef, defining an overload for const
// wchar_t* would cause unsigned short* be printed as a wide string,
// possibly causing invalid memory accesses.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Overloads for wide C strings
GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
inline void PrintTo(wchar_t* s, ::std::ostream* os) {
PrintTo(ImplicitCast_<const wchar_t*>(s), os);
}
#endif
// Overload for C arrays. Multi-dimensional arrays are printed
// properly.
// Prints the given number of elements in an array, without printing
// the curly braces.
template <typename T>
void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
UniversalPrint(a[0], os);
for (size_t i = 1; i != count; i++) {
*os << ", ";
UniversalPrint(a[i], os);
}
}
// Overloads for ::string and ::std::string.
#if GTEST_HAS_GLOBAL_STRING
GTEST_API_ void PrintStringTo(const ::string& s, ::std::ostream* os);
inline void PrintTo(const ::string& s, ::std::ostream* os) {
PrintStringTo(s, os);
}
#endif // GTEST_HAS_GLOBAL_STRING
GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
PrintStringTo(s, os);
}
// Overloads for ::wstring and ::std::wstring.
#if GTEST_HAS_GLOBAL_WSTRING
GTEST_API_ void PrintWideStringTo(const ::wstring& s, ::std::ostream* os);
inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
PrintWideStringTo(s, os);
}
#endif // GTEST_HAS_GLOBAL_WSTRING
#if GTEST_HAS_STD_WSTRING
GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
PrintWideStringTo(s, os);
}
#endif // GTEST_HAS_STD_WSTRING
#if GTEST_HAS_TR1_TUPLE
// Overload for ::std::tr1::tuple. Needed for printing function arguments,
// which are packed as tuples.
// Helper function for printing a tuple. T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os);
// Overloaded PrintTo() for tuples of various arities. We support
// tuples of up-to 10 fields. The following implementation works
// regardless of whether tr1::tuple is implemented using the
// non-standard variadic template feature or not.
inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1>
void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2>
void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5,
typename T6>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5,
typename T6, typename T7>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5,
typename T6, typename T7, typename T8>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5,
typename T6, typename T7, typename T8, typename T9>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
template <typename T1, typename T2, typename T3, typename T4, typename T5,
typename T6, typename T7, typename T8, typename T9, typename T10>
void PrintTo(
const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
::std::ostream* os) {
PrintTupleTo(t, os);
}
#endif // GTEST_HAS_TR1_TUPLE
// Overload for std::pair.
template <typename T1, typename T2>
void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
*os << '(';
// We cannot use UniversalPrint(value.first, os) here, as T1 may be
// a reference type. The same for printing value.second.
UniversalPrinter<T1>::Print(value.first, os);
*os << ", ";
UniversalPrinter<T2>::Print(value.second, os);
*os << ')';
}
// Implements printing a non-reference type T by letting the compiler
// pick the right overload of PrintTo() for T.
template <typename T>
class UniversalPrinter {
public:
// MSVC warns about adding const to a function type, so we want to
// disable the warning.
#ifdef _MSC_VER
# pragma warning(push) // Saves the current warning state.
# pragma warning(disable:4180) // Temporarily disables warning 4180.
#endif // _MSC_VER
// Note: we deliberately don't call this PrintTo(), as that name
// conflicts with ::testing::internal::PrintTo in the body of the
// function.
static void Print(const T& value, ::std::ostream* os) {
// By default, ::testing::internal::PrintTo() is used for printing
// the value.
//
// Thanks to Koenig look-up, if T is a class and has its own
// PrintTo() function defined in its namespace, that function will
// be visible here. Since it is more specific than the generic ones
// in ::testing::internal, it will be picked by the compiler in the
// following statement - exactly what we want.
PrintTo(value, os);
}
#ifdef _MSC_VER
# pragma warning(pop) // Restores the warning state.
#endif // _MSC_VER
};
// UniversalPrintArray(begin, len, os) prints an array of 'len'
// elements, starting at address 'begin'.
template <typename T>
void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
if (len == 0) {
*os << "{}";
}
else {
*os << "{ ";
const size_t kThreshold = 18;
const size_t kChunkSize = 8;
// If the array has more than kThreshold elements, we'll have to
// omit some details by printing only the first and the last
// kChunkSize elements.
// TODO(wan@google.com): let the user control the threshold using a flag.
if (len <= kThreshold) {
PrintRawArrayTo(begin, len, os);
}
else {
PrintRawArrayTo(begin, kChunkSize, os);
*os << ", ..., ";
PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
}
*os << " }";
}
}
// This overload prints a (const) char array compactly.
GTEST_API_ void UniversalPrintArray(
const char* begin, size_t len, ::std::ostream* os);
// This overload prints a (const) wchar_t array compactly.
GTEST_API_ void UniversalPrintArray(
const wchar_t* begin, size_t len, ::std::ostream* os);
// Implements printing an array type T[N].
template <typename T, size_t N>
class UniversalPrinter<T[N]> {
public:
// Prints the given array, omitting some elements when there are too
// many.
static void Print(const T (&a)[N], ::std::ostream* os) {
UniversalPrintArray(a, N, os);
}
};
// Implements printing a reference type T&.
template <typename T>
class UniversalPrinter<T&> {
public:
// MSVC warns about adding const to a function type, so we want to
// disable the warning.
#ifdef _MSC_VER
# pragma warning(push) // Saves the current warning state.
# pragma warning(disable:4180) // Temporarily disables warning 4180.
#endif // _MSC_VER
static void Print(const T& value, ::std::ostream* os) {
// Prints the address of the value. We use reinterpret_cast here
// as static_cast doesn't compile when T is a function type.
*os << "@" << reinterpret_cast<const void*>(&value) << " ";
// Then prints the value itself.
UniversalPrint(value, os);
}
#ifdef _MSC_VER
# pragma warning(pop) // Restores the warning state.
#endif // _MSC_VER
};
// Prints a value tersely: for a reference type, the referenced value
// (but not the address) is printed; for a (const) char pointer, the
// NUL-terminated string (but not the pointer) is printed.
template <typename T>
class UniversalTersePrinter {
public:
static void Print(const T& value, ::std::ostream* os) {
UniversalPrint(value, os);
}
};
template <typename T>
class UniversalTersePrinter<T&> {
public:
static void Print(const T& value, ::std::ostream* os) {
UniversalPrint(value, os);
}
};
template <typename T, size_t N>
class UniversalTersePrinter<T[N]> {
public:
static void Print(const T (&value)[N], ::std::ostream* os) {
UniversalPrinter<T[N]>::Print(value, os);
}
};
template <>
class UniversalTersePrinter<const char*> {
public:
static void Print(const char* str, ::std::ostream* os) {
if (str == NULL) {
*os << "NULL";
}
else {
UniversalPrint(string(str), os);
}
}
};
template <>
class UniversalTersePrinter<char*> {
public:
static void Print(char* str, ::std::ostream* os) {
UniversalTersePrinter<const char*>::Print(str, os);
}
};
#if GTEST_HAS_STD_WSTRING
template <>
class UniversalTersePrinter<const wchar_t*> {
public:
static void Print(const wchar_t* str, ::std::ostream* os) {
if (str == NULL) {
*os << "NULL";
}
else {
UniversalPrint(::std::wstring(str), os);
}
}
};
#endif
template <>
class UniversalTersePrinter<wchar_t*> {
public:
static void Print(wchar_t* str, ::std::ostream* os) {
UniversalTersePrinter<const wchar_t*>::Print(str, os);
}
};
template <typename T>
void UniversalTersePrint(const T& value, ::std::ostream* os) {
UniversalTersePrinter<T>::Print(value, os);
}
// Prints a value using the type inferred by the compiler. The
// difference between this and UniversalTersePrint() is that for a
// (const) char pointer, this prints both the pointer and the
// NUL-terminated string.
template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os) {
// A workarond for the bug in VC++ 7.1 that prevents us from instantiating
// UniversalPrinter with T directly.
typedef T T1;
UniversalPrinter<T1>::Print(value, os);
}
#if GTEST_HAS_TR1_TUPLE
typedef ::std::vector<string> Strings;
// This helper template allows PrintTo() for tuples and
// UniversalTersePrintTupleFieldsToStrings() to be defined by
// induction on the number of tuple fields. The idea is that
// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
// fields in tuple t, and can be defined in terms of
// TuplePrefixPrinter<N - 1>.
// The inductive case.
template <size_t N>
struct TuplePrefixPrinter {
// Prints the first N fields of a tuple.
template <typename Tuple>
static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
TuplePrefixPrinter < N - 1 >::PrintPrefixTo(t, os);
*os << ", ";
UniversalPrinter < typename ::std::tr1::tuple_element < N - 1, Tuple >::type >
::Print(::std::tr1::get < N - 1 > (t), os);
}
// Tersely prints the first N fields of a tuple to a string vector,
// one element for each field.
template <typename Tuple>
static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
TuplePrefixPrinter < N - 1 >::TersePrintPrefixToStrings(t, strings);
::std::stringstream ss;
UniversalTersePrint(::std::tr1::get < N - 1 > (t), &ss);
strings->push_back(ss.str());
}
};
// Base cases.
template <>
struct TuplePrefixPrinter<0> {
template <typename Tuple>
static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
template <typename Tuple>
static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
};
// We have to specialize the entire TuplePrefixPrinter<> class
// template here, even though the definition of
// TersePrintPrefixToStrings() is the same as the generic version, as
// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
// support specializing a method template of a class template.
template <>
struct TuplePrefixPrinter<1> {
template <typename Tuple>
static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
Print(::std::tr1::get<0>(t), os);
}
template <typename Tuple>
static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
::std::stringstream ss;
UniversalTersePrint(::std::tr1::get<0>(t), &ss);
strings->push_back(ss.str());
}
};
// Helper function for printing a tuple. T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os) {
*os << "(";
TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
PrintPrefixTo(t, os);
*os << ")";
}
// Prints the fields of a tuple tersely to a string vector, one
// element for each field. See the comment before
// UniversalTersePrint() for how we define "tersely".
template <typename Tuple>
Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
Strings result;
TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
TersePrintPrefixToStrings(value, &result);
return result;
}
#endif // GTEST_HAS_TR1_TUPLE
} // namespace internal
template <typename T>
::std::string PrintToString(const T& value) {
::std::stringstream ss;
internal::UniversalTersePrinter<T>::Print(value, &ss);
return ss.str();
}
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
@@ -0,0 +1,232 @@
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Utilities for testing Google Test itself and code that uses Google Test
// (e.g. frameworks built on top of Google Test).
#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
#include "gtest/gtest.h"
namespace testing {
// This helper class can be used to mock out Google Test failure reporting
// so that we can test Google Test or code that builds on Google Test.
//
// An object of this class appends a TestPartResult object to the
// TestPartResultArray object given in the constructor whenever a Google Test
// failure is reported. It can either intercept only failures that are
// generated in the same thread that created this object or it can intercept
// all generated failures. The scope of this mock object can be controlled with
// the second argument to the two arguments constructor.
class GTEST_API_ ScopedFakeTestPartResultReporter
: public TestPartResultReporterInterface {
public:
// The two possible mocking modes of this object.
enum InterceptMode {
INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures.
INTERCEPT_ALL_THREADS // Intercepts all failures.
};
// The c'tor sets this object as the test part result reporter used
// by Google Test. The 'result' parameter specifies where to report the
// results. This reporter will only catch failures generated in the current
// thread. DEPRECATED
explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
// Same as above, but you can choose the interception scope of this object.
ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
TestPartResultArray* result);
// The d'tor restores the previous test part result reporter.
virtual ~ScopedFakeTestPartResultReporter();
// Appends the TestPartResult object to the TestPartResultArray
// received in the constructor.
//
// This method is from the TestPartResultReporterInterface
// interface.
virtual void ReportTestPartResult(const TestPartResult& result);
private:
void Init();
const InterceptMode intercept_mode_;
TestPartResultReporterInterface* old_reporter_;
TestPartResultArray* const result_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
};
namespace internal {
// A helper class for implementing EXPECT_FATAL_FAILURE() and
// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring. If that's not the case, a
// non-fatal failure will be generated.
class GTEST_API_ SingleFailureChecker {
public:
// The constructor remembers the arguments.
SingleFailureChecker(const TestPartResultArray* results,
TestPartResult::Type type,
const string& substr);
~SingleFailureChecker();
private:
const TestPartResultArray* const results_;
const TestPartResult::Type type_;
const string substr_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
};
} // namespace internal
} // namespace testing
// A set of macros for testing Google Test assertions or code that's expected
// to generate Google Test fatal failures. It verifies that the given
// statement will cause exactly one fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
// - 'statement' cannot reference local non-static variables or
// non-static members of the current object.
// - 'statement' cannot return a value.
// - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works. The AcceptsMacroThatExpandsToUnprotectedComma test in
// gtest_unittest.cc will fail to compile if we do that.
#define EXPECT_FATAL_FAILURE(statement, substr) \
do { \
class GTestExpectFatalFailureHelper {\
public:\
static void Execute() { statement; }\
};\
::testing::TestPartResultArray gtest_failures;\
::testing::internal::SingleFailureChecker gtest_checker(\
&gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
{\
::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
::testing::ScopedFakeTestPartResultReporter:: \
INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
GTestExpectFatalFailureHelper::Execute();\
}\
} while (::testing::internal::AlwaysFalse())
#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
do { \
class GTestExpectFatalFailureHelper {\
public:\
static void Execute() { statement; }\
};\
::testing::TestPartResultArray gtest_failures;\
::testing::internal::SingleFailureChecker gtest_checker(\
&gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
{\
::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
::testing::ScopedFakeTestPartResultReporter:: \
INTERCEPT_ALL_THREADS, &gtest_failures);\
GTestExpectFatalFailureHelper::Execute();\
}\
} while (::testing::internal::AlwaysFalse())
// A macro for testing Google Test assertions or code that's expected to
// generate Google Test non-fatal failures. It asserts that the given
// statement will cause exactly one non-fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// 'statement' is allowed to reference local variables and members of
// the current object.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
// - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works. If we do that, the code won't compile when the user gives
// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
// expands to code containing an unprotected comma. The
// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
// catches that.
//
// For the same reason, we have to write
// if (::testing::internal::AlwaysTrue()) { statement; }
// instead of
// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
// to avoid an MSVC warning on unreachable code.
#define EXPECT_NONFATAL_FAILURE(statement, substr) \
do {\
::testing::TestPartResultArray gtest_failures;\
::testing::internal::SingleFailureChecker gtest_checker(\
&gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
(substr));\
{\
::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
::testing::ScopedFakeTestPartResultReporter:: \
INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
if (::testing::internal::AlwaysTrue()) { statement; }\
}\
} while (::testing::internal::AlwaysFalse())
#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
do {\
::testing::TestPartResultArray gtest_failures;\
::testing::internal::SingleFailureChecker gtest_checker(\
&gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
(substr));\
{\
::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
&gtest_failures);\
if (::testing::internal::AlwaysTrue()) { statement; }\
}\
} while (::testing::internal::AlwaysFalse())
#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
@@ -0,0 +1,197 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
#include <iosfwd>
#include <vector>
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"
namespace testing {
// A copyable object representing the result of a test part (i.e. an
// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
//
// Don't inherit from TestPartResult as its destructor is not virtual.
class GTEST_API_ TestPartResult {
public:
// The possible outcomes of a test part (i.e. an assertion or an
// explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
enum Type {
kSuccess, // Succeeded.
kNonFatalFailure, // Failed but the test can continue.
kFatalFailure // Failed and the test should be terminated.
};
// C'tor. TestPartResult does NOT have a default constructor.
// Always use this constructor (with parameters) to create a
// TestPartResult object.
TestPartResult(Type a_type,
const char* a_file_name,
int a_line_number,
const char* a_message)
: type_(a_type),
file_name_(a_file_name == NULL ? "" : a_file_name),
line_number_(a_line_number),
summary_(ExtractSummary(a_message)),
message_(a_message) {
}
// Gets the outcome of the test part.
Type type() const {
return type_;
}
// Gets the name of the source file where the test part took place, or
// NULL if it's unknown.
const char* file_name() const {
return file_name_.empty() ? NULL : file_name_.c_str();
}
// Gets the line in the source file where the test part took place,
// or -1 if it's unknown.
int line_number() const {
return line_number_;
}
// Gets the summary of the failure message.
const char* summary() const {
return summary_.c_str();
}
// Gets the message associated with the test part.
const char* message() const {
return message_.c_str();
}
// Returns true iff the test part passed.
bool passed() const {
return type_ == kSuccess;
}
// Returns true iff the test part failed.
bool failed() const {
return type_ != kSuccess;
}
// Returns true iff the test part non-fatally failed.
bool nonfatally_failed() const {
return type_ == kNonFatalFailure;
}
// Returns true iff the test part fatally failed.
bool fatally_failed() const {
return type_ == kFatalFailure;
}
private:
Type type_;
// Gets the summary of the failure message by omitting the stack
// trace in it.
static std::string ExtractSummary(const char* message);
// The name of the source file where the test part took place, or
// "" if the source file is unknown.
std::string file_name_;
// The line in the source file where the test part took place, or -1
// if the line number is unknown.
int line_number_;
std::string summary_; // The test failure summary.
std::string message_; // The test failure message.
};
// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
// An array of TestPartResult objects.
//
// Don't inherit from TestPartResultArray as its destructor is not
// virtual.
class GTEST_API_ TestPartResultArray {
public:
TestPartResultArray() {}
// Appends the given TestPartResult to the array.
void Append(const TestPartResult& result);
// Returns the TestPartResult at the given index (0-based).
const TestPartResult& GetTestPartResult(int index) const;
// Returns the number of TestPartResult objects in the array.
int size() const;
private:
std::vector<TestPartResult> array_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
};
// This interface knows how to report a test part result.
class TestPartResultReporterInterface {
public:
virtual ~TestPartResultReporterInterface() {}
virtual void ReportTestPartResult(const TestPartResult& result) = 0;
};
namespace internal {
// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
// statement generates new fatal failures. To do so it registers itself as the
// current test part result reporter. Besides checking if fatal failures were
// reported, it only delegates the reporting to the former result reporter.
// The original result reporter is restored in the destructor.
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
class GTEST_API_ HasNewFatalFailureHelper
: public TestPartResultReporterInterface {
public:
HasNewFatalFailureHelper();
virtual ~HasNewFatalFailureHelper();
virtual void ReportTestPartResult(const TestPartResult& result);
bool has_new_fatal_failure() const {
return has_new_fatal_failure_;
}
private:
bool has_new_fatal_failure_;
TestPartResultReporterInterface* original_reporter_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
};
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
@@ -0,0 +1,263 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
// This header implements typed tests and type-parameterized tests.
// Typed (aka type-driven) tests repeat the same test for types in a
// list. You must know which types you want to test with when writing
// typed tests. Here's how you do it:
#if 0
// First, define a fixture class template. It should be parameterized
// by a type. Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
public:
...
typedef std::list<T> List;
static T shared_;
T value_;
};
// Next, associate a list of types with the test case, which will be
// repeated for each type in the list. The typedef is necessary for
// the macro to parse correctly.
typedef testing::Types<char, int, unsigned int> MyTypes;
TYPED_TEST_CASE(FooTest, MyTypes);
// If the type list contains only one type, you can write that type
// directly without Types<...>:
// TYPED_TEST_CASE(FooTest, int);
// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
// tests for this test case as you want.
TYPED_TEST(FooTest, DoesBlah) {
// Inside a test, refer to TypeParam to get the type parameter.
// Since we are inside a derived class template, C++ requires use to
// visit the members of FooTest via 'this'.
TypeParam n = this->value_;
// To visit static members of the fixture, add the TestFixture::
// prefix.
n += TestFixture::shared_;
// To refer to typedefs in the fixture, add the "typename
// TestFixture::" prefix.
typename TestFixture::List values;
values.push_back(n);
...
}
TYPED_TEST(FooTest, HasPropertyA) {
...
}
#endif // 0
// Type-parameterized tests are abstract test patterns parameterized
// by a type. Compared with typed tests, type-parameterized tests
// allow you to define the test pattern without knowing what the type
// parameters are. The defined pattern can be instantiated with
// different types any number of times, in any number of translation
// units.
//
// If you are designing an interface or concept, you can define a
// suite of type-parameterized tests to verify properties that any
// valid implementation of the interface/concept should have. Then,
// each implementation can easily instantiate the test suite to verify
// that it conforms to the requirements, without having to write
// similar tests repeatedly. Here's an example:
#if 0
// First, define a fixture class template. It should be parameterized
// by a type. Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
...
};
// Next, declare that you will define a type-parameterized test case
// (the _P suffix is for "parameterized" or "pattern", whichever you
// prefer):
TYPED_TEST_CASE_P(FooTest);
// Then, use TYPED_TEST_P() to define as many type-parameterized tests
// for this type-parameterized test case as you want.
TYPED_TEST_P(FooTest, DoesBlah) {
// Inside a test, refer to TypeParam to get the type parameter.
TypeParam n = 0;
...
}
TYPED_TEST_P(FooTest, HasPropertyA) {
...
}
// Now the tricky part: you need to register all test patterns before
// you can instantiate them. The first argument of the macro is the
// test case name; the rest are the names of the tests in this test
// case.
REGISTER_TYPED_TEST_CASE_P(FooTest,
DoesBlah, HasPropertyA);
// Finally, you are free to instantiate the pattern with the types you
// want. If you put the above code in a header file, you can #include
// it in multiple C++ source files and instantiate it multiple times.
//
// To distinguish different instances of the pattern, the first
// argument to the INSTANTIATE_* macro is a prefix that will be added
// to the actual test case name. Remember to pick unique prefixes for
// different instances.
typedef testing::Types<char, int, unsigned int> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
// If the type list contains only one type, you can write that type
// directly without Types<...>:
// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
#endif // 0
#include "gtest/internal/gtest-port.h"
#include "gtest/internal/gtest-type-util.h"
// Implements typed tests.
#if GTEST_HAS_TYPED_TEST
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the typedef for the type parameters of the
// given test case.
# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define TYPED_TEST_CASE(CaseName, Types) \
typedef ::testing::internal::TypeList< Types >::type \
GTEST_TYPE_PARAMS_(CaseName)
# define TYPED_TEST(CaseName, TestName) \
template <typename gtest_TypeParam_> \
class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
: public CaseName<gtest_TypeParam_> { \
private: \
typedef CaseName<gtest_TypeParam_> TestFixture; \
typedef gtest_TypeParam_ TypeParam; \
virtual void TestBody(); \
}; \
bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
::testing::internal::TypeParameterizedTest< \
CaseName, \
::testing::internal::TemplateSel< \
GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
GTEST_TYPE_PARAMS_(CaseName)>::Register(\
"", #CaseName, #TestName, 0); \
template <typename gtest_TypeParam_> \
void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
#endif // GTEST_HAS_TYPED_TEST
// Implements type-parameterized tests.
#if GTEST_HAS_TYPED_TEST_P
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the namespace name that the type-parameterized tests for
// the given type-parameterized test case are defined in. The exact
// name of the namespace is subject to change without notice.
# define GTEST_CASE_NAMESPACE_(TestCaseName) \
gtest_case_##TestCaseName##_
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the variable used to remember the names of
// the defined tests in the given test case.
# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
gtest_typed_test_case_p_state_##TestCaseName##_
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
//
// Expands to the name of the variable used to remember the names of
// the registered tests in the given test case.
# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
gtest_registered_test_names_##TestCaseName##_
// The variables defined in the type-parameterized test macros are
// static as typically these macros are used in a .h file that can be
// #included in multiple translation units linked together.
# define TYPED_TEST_CASE_P(CaseName) \
static ::testing::internal::TypedTestCasePState \
GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
# define TYPED_TEST_P(CaseName, TestName) \
namespace GTEST_CASE_NAMESPACE_(CaseName) { \
template <typename gtest_TypeParam_> \
class TestName : public CaseName<gtest_TypeParam_> { \
private: \
typedef CaseName<gtest_TypeParam_> TestFixture; \
typedef gtest_TypeParam_ TypeParam; \
virtual void TestBody(); \
}; \
static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
__FILE__, __LINE__, #CaseName, #TestName); \
} \
template <typename gtest_TypeParam_> \
void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
namespace GTEST_CASE_NAMESPACE_(CaseName) { \
typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
} \
static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
__FILE__, __LINE__, #__VA_ARGS__)
// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
::testing::internal::TypeParameterizedTestCase<CaseName, \
GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
::testing::internal::TypeList< Types >::type>::Register(\
#Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
#endif // GTEST_HAS_TYPED_TEST_P
#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,368 @@
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND!
//
// Implements a family of generic predicate assertion macros.
#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
// Makes sure this header is not included before gtest.h.
#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
# error Do not include gtest_pred_impl.h directly. Include gtest.h instead.
#endif // GTEST_INCLUDE_GTEST_GTEST_H_
// This header implements a family of generic predicate assertion
// macros:
//
// ASSERT_PRED_FORMAT1(pred_format, v1)
// ASSERT_PRED_FORMAT2(pred_format, v1, v2)
// ...
//
// where pred_format is a function or functor that takes n (in the
// case of ASSERT_PRED_FORMATn) values and their source expression
// text, and returns a testing::AssertionResult. See the definition
// of ASSERT_EQ in gtest.h for an example.
//
// If you don't care about formatting, you can use the more
// restrictive version:
//
// ASSERT_PRED1(pred, v1)
// ASSERT_PRED2(pred, v1, v2)
// ...
//
// where pred is an n-ary function or functor that returns bool,
// and the values v1, v2, ..., must support the << operator for
// streaming to std::ostream.
//
// We also define the EXPECT_* variations.
//
// For now we only support predicates whose arity is at most 5.
// Please email googletestframework@googlegroups.com if you need
// support for higher arities.
// GTEST_ASSERT_ is the basic statement to which all of the assertions
// in this file reduce. Don't use this in your code.
#define GTEST_ASSERT_(expression, on_failure) \
GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
if (const ::testing::AssertionResult gtest_ar = (expression)) \
; \
else \
on_failure(gtest_ar.failure_message())
// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use
// this in your code.
template <typename Pred,
typename T1>
AssertionResult AssertPred1Helper(const char* pred_text,
const char* e1,
Pred pred,
const T1& v1) {
if (pred(v1)) {
return AssertionSuccess();
}
return AssertionFailure() << pred_text << "("
<< e1 << ") evaluates to false, where"
<< "\n" << e1 << " evaluates to " << v1;
}
// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
// Don't use this in your code.
#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
GTEST_ASSERT_(pred_format(#v1, v1), \
on_failure)
// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use
// this in your code.
#define GTEST_PRED1_(pred, v1, on_failure)\
GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
#v1, \
pred, \
v1), on_failure)
// Unary predicate assertion macros.
#define EXPECT_PRED_FORMAT1(pred_format, v1) \
GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED1(pred, v1) \
GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT1(pred_format, v1) \
GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED1(pred, v1) \
GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use
// this in your code.
template <typename Pred,
typename T1,
typename T2>
AssertionResult AssertPred2Helper(const char* pred_text,
const char* e1,
const char* e2,
Pred pred,
const T1& v1,
const T2& v2) {
if (pred(v1, v2)) {
return AssertionSuccess();
}
return AssertionFailure() << pred_text << "("
<< e1 << ", "
<< e2 << ") evaluates to false, where"
<< "\n" << e1 << " evaluates to " << v1
<< "\n" << e2 << " evaluates to " << v2;
}
// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
// Don't use this in your code.
#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
on_failure)
// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use
// this in your code.
#define GTEST_PRED2_(pred, v1, v2, on_failure)\
GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
#v1, \
#v2, \
pred, \
v1, \
v2), on_failure)
// Binary predicate assertion macros.
#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED2(pred, v1, v2) \
GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED2(pred, v1, v2) \
GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use
// this in your code.
template <typename Pred,
typename T1,
typename T2,
typename T3>
AssertionResult AssertPred3Helper(const char* pred_text,
const char* e1,
const char* e2,
const char* e3,
Pred pred,
const T1& v1,
const T2& v2,
const T3& v3) {
if (pred(v1, v2, v3)) {
return AssertionSuccess();
}
return AssertionFailure() << pred_text << "("
<< e1 << ", "
<< e2 << ", "
<< e3 << ") evaluates to false, where"
<< "\n" << e1 << " evaluates to " << v1
<< "\n" << e2 << " evaluates to " << v2
<< "\n" << e3 << " evaluates to " << v3;
}
// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
// Don't use this in your code.
#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
on_failure)
// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use
// this in your code.
#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
#v1, \
#v2, \
#v3, \
pred, \
v1, \
v2, \
v3), on_failure)
// Ternary predicate assertion macros.
#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED3(pred, v1, v2, v3) \
GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED3(pred, v1, v2, v3) \
GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use
// this in your code.
template <typename Pred,
typename T1,
typename T2,
typename T3,
typename T4>
AssertionResult AssertPred4Helper(const char* pred_text,
const char* e1,
const char* e2,
const char* e3,
const char* e4,
Pred pred,
const T1& v1,
const T2& v2,
const T3& v3,
const T4& v4) {
if (pred(v1, v2, v3, v4)) {
return AssertionSuccess();
}
return AssertionFailure() << pred_text << "("
<< e1 << ", "
<< e2 << ", "
<< e3 << ", "
<< e4 << ") evaluates to false, where"
<< "\n" << e1 << " evaluates to " << v1
<< "\n" << e2 << " evaluates to " << v2
<< "\n" << e3 << " evaluates to " << v3
<< "\n" << e4 << " evaluates to " << v4;
}
// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
// Don't use this in your code.
#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
on_failure)
// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use
// this in your code.
#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
#v1, \
#v2, \
#v3, \
#v4, \
pred, \
v1, \
v2, \
v3, \
v4), on_failure)
// 4-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use
// this in your code.
template <typename Pred,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5>
AssertionResult AssertPred5Helper(const char* pred_text,
const char* e1,
const char* e2,
const char* e3,
const char* e4,
const char* e5,
Pred pred,
const T1& v1,
const T2& v2,
const T3& v3,
const T4& v4,
const T5& v5) {
if (pred(v1, v2, v3, v4, v5)) {
return AssertionSuccess();
}
return AssertionFailure() << pred_text << "("
<< e1 << ", "
<< e2 << ", "
<< e3 << ", "
<< e4 << ", "
<< e5 << ") evaluates to false, where"
<< "\n" << e1 << " evaluates to " << v1
<< "\n" << e2 << " evaluates to " << v2
<< "\n" << e3 << " evaluates to " << v3
<< "\n" << e4 << " evaluates to " << v4
<< "\n" << e5 << " evaluates to " << v5;
}
// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
// Don't use this in your code.
#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
on_failure)
// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use
// this in your code.
#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
#v1, \
#v2, \
#v3, \
#v4, \
#v5, \
pred, \
v1, \
v2, \
v3, \
v4, \
v5), on_failure)
// 5-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
@@ -0,0 +1,58 @@
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Google C++ Testing Framework definitions useful in production code.
#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
// When you need to test the private or protected members of a class,
// use the FRIEND_TEST macro to declare your tests as friends of the
// class. For example:
//
// class MyClass {
// private:
// void MyMethod();
// FRIEND_TEST(MyClassTest, MyMethod);
// };
//
// class MyClassTest : public testing::Test {
// // ...
// };
//
// TEST_F(MyClassTest, MyMethod) {
// // Can call MyClass::MyMethod() here.
// }
#define FRIEND_TEST(test_case_name, test_name)\
friend class test_case_name##_##test_name##_Test
#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
@@ -0,0 +1,330 @@
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines internal utilities needed for implementing
// death tests. They are subject to change without notice.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
#include "gtest/internal/gtest-internal.h"
#include <stdio.h>
namespace testing {
namespace internal {
GTEST_DECLARE_string_(internal_run_death_test);
// Names of the flags (needed for parsing Google Test flags).
const char kDeathTestStyleFlag[] = "death_test_style";
const char kDeathTestUseFork[] = "death_test_use_fork";
const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
#if GTEST_HAS_DEATH_TEST
// DeathTest is a class that hides much of the complexity of the
// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method
// returns a concrete class that depends on the prevailing death test
// style, as defined by the --gtest_death_test_style and/or
// --gtest_internal_run_death_test flags.
// In describing the results of death tests, these terms are used with
// the corresponding definitions:
//
// exit status: The integer exit information in the format specified
// by wait(2)
// exit code: The integer code passed to exit(3), _exit(2), or
// returned from main()
class GTEST_API_ DeathTest {
public:
// Create returns false if there was an error determining the
// appropriate action to take for the current death test; for example,
// if the gtest_death_test_style flag is set to an invalid value.
// The LastMessage method will return a more detailed message in that
// case. Otherwise, the DeathTest pointer pointed to by the "test"
// argument is set. If the death test should be skipped, the pointer
// is set to NULL; otherwise, it is set to the address of a new concrete
// DeathTest object that controls the execution of the current test.
static bool Create(const char* statement, const RE* regex,
const char* file, int line, DeathTest** test);
DeathTest();
virtual ~DeathTest() { }
// A helper class that aborts a death test when it's deleted.
class ReturnSentinel {
public:
explicit ReturnSentinel(DeathTest* test) : test_(test) { }
~ReturnSentinel() {
test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT);
}
private:
DeathTest* const test_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
} GTEST_ATTRIBUTE_UNUSED_;
// An enumeration of possible roles that may be taken when a death
// test is encountered. EXECUTE means that the death test logic should
// be executed immediately. OVERSEE means that the program should prepare
// the appropriate environment for a child process to execute the death
// test, then wait for it to complete.
enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
// An enumeration of the three reasons that a test might be aborted.
enum AbortReason {
TEST_ENCOUNTERED_RETURN_STATEMENT,
TEST_THREW_EXCEPTION,
TEST_DID_NOT_DIE
};
// Assumes one of the above roles.
virtual TestRole AssumeRole() = 0;
// Waits for the death test to finish and returns its status.
virtual int Wait() = 0;
// Returns true if the death test passed; that is, the test process
// exited during the test, its exit status matches a user-supplied
// predicate, and its stderr output matches a user-supplied regular
// expression.
// The user-supplied predicate may be a macro expression rather
// than a function pointer or functor, or else Wait and Passed could
// be combined.
virtual bool Passed(bool exit_status_ok) = 0;
// Signals that the death test did not die as expected.
virtual void Abort(AbortReason reason) = 0;
// Returns a human-readable outcome message regarding the outcome of
// the last death test.
static const char* LastMessage();
static void set_last_death_test_message(const std::string& message);
private:
// A string containing a description of the outcome of the last death test.
static std::string last_death_test_message_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
};
// Factory interface for death tests. May be mocked out for testing.
class DeathTestFactory {
public:
virtual ~DeathTestFactory() { }
virtual bool Create(const char* statement, const RE* regex,
const char* file, int line, DeathTest** test) = 0;
};
// A concrete DeathTestFactory implementation for normal use.
class DefaultDeathTestFactory : public DeathTestFactory {
public:
virtual bool Create(const char* statement, const RE* regex,
const char* file, int line, DeathTest** test);
};
// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
// Traps C++ exceptions escaping statement and reports them as test
// failures. Note that trapping SEH exceptions is not implemented here.
# if GTEST_HAS_EXCEPTIONS
# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
try { \
GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
} catch (const ::std::exception& gtest_exception) { \
fprintf(\
stderr, \
"\n%s: Caught std::exception-derived exception escaping the " \
"death test statement. Exception message: %s\n", \
::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
gtest_exception.what()); \
fflush(stderr); \
death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
} catch (...) { \
death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
}
# else
# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
# endif
// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
// ASSERT_EXIT*, and EXPECT_EXIT*.
# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
if (::testing::internal::AlwaysTrue()) { \
const ::testing::internal::RE& gtest_regex = (regex); \
::testing::internal::DeathTest* gtest_dt; \
if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
__FILE__, __LINE__, &gtest_dt)) { \
goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
} \
if (gtest_dt != NULL) { \
::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
gtest_dt_ptr(gtest_dt); \
switch (gtest_dt->AssumeRole()) { \
case ::testing::internal::DeathTest::OVERSEE_TEST: \
if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
} \
break; \
case ::testing::internal::DeathTest::EXECUTE_TEST: { \
::testing::internal::DeathTest::ReturnSentinel \
gtest_sentinel(gtest_dt); \
GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
break; \
} \
default: \
break; \
} \
} \
} else \
GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
fail(::testing::internal::DeathTest::LastMessage())
// The symbol "fail" here expands to something into which a message
// can be streamed.
// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
// NDEBUG mode. In this case we need the statements to be executed, the regex is
// ignored, and the macro must accept a streamed message even though the message
// is never printed.
# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
if (::testing::internal::AlwaysTrue()) { \
GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
} else \
::testing::Message()
// A class representing the parsed contents of the
// --gtest_internal_run_death_test flag, as it existed when
// RUN_ALL_TESTS was called.
class InternalRunDeathTestFlag {
public:
InternalRunDeathTestFlag(const std::string& a_file,
int a_line,
int an_index,
int a_write_fd)
: file_(a_file), line_(a_line), index_(an_index),
write_fd_(a_write_fd) {}
~InternalRunDeathTestFlag() {
if (write_fd_ >= 0) {
posix::Close(write_fd_);
}
}
const std::string& file() const {
return file_;
}
int line() const {
return line_;
}
int index() const {
return index_;
}
int write_fd() const {
return write_fd_;
}
private:
std::string file_;
int line_;
int index_;
int write_fd_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
};
// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
#else // GTEST_HAS_DEATH_TEST
// This macro is used for implementing macros such as
// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
// death tests are not supported. Those macros must compile on such systems
// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
// systems that support death tests. This allows one to write such a macro
// on a system that does not support death tests and be sure that it will
// compile on a death-test supporting system.
//
// Parameters:
// statement - A statement that a macro such as EXPECT_DEATH would test
// for program termination. This macro has to make sure this
// statement is compiled but not executed, to ensure that
// EXPECT_DEATH_IF_SUPPORTED compiles with a certain
// parameter iff EXPECT_DEATH compiles with it.
// regex - A regex that a macro such as EXPECT_DEATH would use to test
// the output of statement. This parameter has to be
// compiled but not evaluated by this macro, to ensure that
// this macro only accepts expressions that a macro such as
// EXPECT_DEATH would accept.
// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
// and a return statement for ASSERT_DEATH_IF_SUPPORTED.
// This ensures that ASSERT_DEATH_IF_SUPPORTED will not
// compile inside functions where ASSERT_DEATH doesn't
// compile.
//
// The branch that has an always false condition is used to ensure that
// statement and regex are compiled (and thus syntactically correct) but
// never executed. The unreachable code macro protects the terminator
// statement from generating an 'unreachable code' warning in case
// statement unconditionally returns or throws. The Message constructor at
// the end allows the syntax of streaming additional messages into the
// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
if (::testing::internal::AlwaysTrue()) { \
GTEST_LOG_(WARNING) \
<< "Death tests are not supported on this platform.\n" \
<< "Statement '" #statement "' cannot be verified."; \
} else if (::testing::internal::AlwaysFalse()) { \
::testing::internal::RE::PartialMatch(".*", (regex)); \
GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
terminator; \
} else \
::testing::Message()
#endif // GTEST_HAS_DEATH_TEST
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
@@ -0,0 +1,212 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: keith.ray@gmail.com (Keith Ray)
//
// Google Test filepath utilities
//
// This header file declares classes and functions used internally by
// Google Test. They are subject to change without notice.
//
// This file is #included in <gtest/internal/gtest-internal.h>.
// Do not include this header file separately!
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
#include "gtest/internal/gtest-string.h"
namespace testing {
namespace internal {
// FilePath - a class for file and directory pathname manipulation which
// handles platform-specific conventions (like the pathname separator).
// Used for helper functions for naming files in a directory for xml output.
// Except for Set methods, all methods are const or static, which provides an
// "immutable value object" -- useful for peace of mind.
// A FilePath with a value ending in a path separator ("like/this/") represents
// a directory, otherwise it is assumed to represent a file. In either case,
// it may or may not represent an actual file or directory in the file system.
// Names are NOT checked for syntax correctness -- no checking for illegal
// characters, malformed paths, etc.
class GTEST_API_ FilePath {
public:
FilePath() : pathname_("") { }
FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
explicit FilePath(const std::string& pathname) : pathname_(pathname) {
Normalize();
}
FilePath& operator=(const FilePath& rhs) {
Set(rhs);
return *this;
}
void Set(const FilePath& rhs) {
pathname_ = rhs.pathname_;
}
const std::string& string() const {
return pathname_;
}
const char* c_str() const {
return pathname_.c_str();
}
// Returns the current working directory, or "" if unsuccessful.
static FilePath GetCurrentDir();
// Given directory = "dir", base_name = "test", number = 0,
// extension = "xml", returns "dir/test.xml". If number is greater
// than zero (e.g., 12), returns "dir/test_12.xml".
// On Windows platform, uses \ as the separator rather than /.
static FilePath MakeFileName(const FilePath& directory,
const FilePath& base_name,
int number,
const char* extension);
// Given directory = "dir", relative_path = "test.xml",
// returns "dir/test.xml".
// On Windows, uses \ as the separator rather than /.
static FilePath ConcatPaths(const FilePath& directory,
const FilePath& relative_path);
// Returns a pathname for a file that does not currently exist. The pathname
// will be directory/base_name.extension or
// directory/base_name_<number>.extension if directory/base_name.extension
// already exists. The number will be incremented until a pathname is found
// that does not already exist.
// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
// There could be a race condition if two or more processes are calling this
// function at the same time -- they could both pick the same filename.
static FilePath GenerateUniqueFileName(const FilePath& directory,
const FilePath& base_name,
const char* extension);
// Returns true iff the path is "".
bool IsEmpty() const {
return pathname_.empty();
}
// If input name has a trailing separator character, removes it and returns
// the name, otherwise return the name string unmodified.
// On Windows platform, uses \ as the separator, other platforms use /.
FilePath RemoveTrailingPathSeparator() const;
// Returns a copy of the FilePath with the directory part removed.
// Example: FilePath("path/to/file").RemoveDirectoryName() returns
// FilePath("file"). If there is no directory part ("just_a_file"), it returns
// the FilePath unmodified. If there is no file part ("just_a_dir/") it
// returns an empty FilePath ("").
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath RemoveDirectoryName() const;
// RemoveFileName returns the directory path with the filename removed.
// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath RemoveFileName() const;
// Returns a copy of the FilePath with the case-insensitive extension removed.
// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
// FilePath("dir/file"). If a case-insensitive extension is not
// found, returns a copy of the original FilePath.
FilePath RemoveExtension(const char* extension) const;
// Creates directories so that path exists. Returns true if successful or if
// the directories already exist; returns false if unable to create
// directories for any reason. Will also return false if the FilePath does
// not represent a directory (that is, it doesn't end with a path separator).
bool CreateDirectoriesRecursively() const;
// Create the directory so that path exists. Returns true if successful or
// if the directory already exists; returns false if unable to create the
// directory for any reason, including if the parent directory does not
// exist. Not named "CreateDirectory" because that's a macro on Windows.
bool CreateFolder() const;
// Returns true if FilePath describes something in the file-system,
// either a file, directory, or whatever, and that something exists.
bool FileOrDirectoryExists() const;
// Returns true if pathname describes a directory in the file-system
// that exists.
bool DirectoryExists() const;
// Returns true if FilePath ends with a path separator, which indicates that
// it is intended to represent a directory. Returns false otherwise.
// This does NOT check that a directory (or file) actually exists.
bool IsDirectory() const;
// Returns true if pathname describes a root directory. (Windows has one
// root directory per disk drive.)
bool IsRootDirectory() const;
// Returns true if pathname describes an absolute path.
bool IsAbsolutePath() const;
private:
// Replaces multiple consecutive separators with a single separator.
// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
// redundancies that might be in a pathname involving "." or "..".
//
// A pathname with multiple consecutive separators may occur either through
// user error or as a result of some scripts or APIs that generate a pathname
// with a trailing separator. On other platforms the same API or script
// may NOT generate a pathname with a trailing "/". Then elsewhere that
// pathname may have another "/" and pathname components added to it,
// without checking for the separator already being there.
// The script language and operating system may allow paths like "foo//bar"
// but some of the functions in FilePath will not handle that correctly. In
// particular, RemoveTrailingPathSeparator() only removes one separator, and
// it is called in CreateDirectoriesRecursively() assuming that it will change
// a pathname from directory syntax (trailing separator) to filename syntax.
//
// On Windows this method also replaces the alternate path separator '/' with
// the primary path separator '\\', so that for example "bar\\/\\foo" becomes
// "bar\\foo".
void Normalize();
// Returns a pointer to the last occurence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FindLastPathSeparator() const;
std::string pathname_;
}; // class FilePath
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,266 @@
// Copyright 2003 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Dan Egnor (egnor@google.com)
//
// A "smart" pointer type with reference tracking. Every pointer to a
// particular object is kept on a circular linked list. When the last pointer
// to an object is destroyed or reassigned, the object is deleted.
//
// Used properly, this deletes the object when the last reference goes away.
// There are several caveats:
// - Like all reference counting schemes, cycles lead to leaks.
// - Each smart pointer is actually two pointers (8 bytes instead of 4).
// - Every time a pointer is assigned, the entire list of pointers to that
// object is traversed. This class is therefore NOT SUITABLE when there
// will often be more than two or three pointers to a particular object.
// - References are only tracked as long as linked_ptr<> objects are copied.
// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
// will happen (double deletion).
//
// A good use of this class is storing object references in STL containers.
// You can safely put linked_ptr<> in a vector<>.
// Other uses may not be as good.
//
// Note: If you use an incomplete type with linked_ptr<>, the class
// *containing* linked_ptr<> must have a constructor and destructor (even
// if they do nothing!).
//
// Bill Gibbons suggested we use something like this.
//
// Thread Safety:
// Unlike other linked_ptr implementations, in this implementation
// a linked_ptr object is thread-safe in the sense that:
// - it's safe to copy linked_ptr objects concurrently,
// - it's safe to copy *from* a linked_ptr and read its underlying
// raw pointer (e.g. via get()) concurrently, and
// - it's safe to write to two linked_ptrs that point to the same
// shared object concurrently.
// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
// confusion with normal linked_ptr.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
#include <stdlib.h>
#include <assert.h>
#include "gtest/internal/gtest-port.h"
namespace testing {
namespace internal {
// Protects copying of all linked_ptr objects.
GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
// This is used internally by all instances of linked_ptr<>. It needs to be
// a non-template class because different types of linked_ptr<> can refer to
// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
// So, it needs to be possible for different types of linked_ptr to participate
// in the same circular linked list, so we need a single class type here.
//
// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
class linked_ptr_internal {
public:
// Create a new circle that includes only this instance.
void join_new() {
next_ = this;
}
// Many linked_ptr operations may change p.link_ for some linked_ptr
// variable p in the same circle as this object. Therefore we need
// to prevent two such operations from occurring concurrently.
//
// Note that different types of linked_ptr objects can coexist in a
// circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
// linked_ptr<Derived2>). Therefore we must use a single mutex to
// protect all linked_ptr objects. This can create serious
// contention in production code, but is acceptable in a testing
// framework.
// Join an existing circle.
void join(linked_ptr_internal const* ptr)
GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
MutexLock lock(&g_linked_ptr_mutex);
linked_ptr_internal const* p = ptr;
while (p->next_ != ptr) {
p = p->next_;
}
p->next_ = this;
next_ = ptr;
}
// Leave whatever circle we're part of. Returns true if we were the
// last member of the circle. Once this is done, you can join() another.
bool depart()
GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
MutexLock lock(&g_linked_ptr_mutex);
if (next_ == this) {
return true;
}
linked_ptr_internal const* p = next_;
while (p->next_ != this) {
p = p->next_;
}
p->next_ = next_;
return false;
}
private:
mutable linked_ptr_internal const* next_;
};
template <typename T>
class linked_ptr {
public:
typedef T element_type;
// Take over ownership of a raw pointer. This should happen as soon as
// possible after the object is created.
explicit linked_ptr(T* ptr = NULL) {
capture(ptr);
}
~linked_ptr() {
depart();
}
// Copy an existing linked_ptr<>, adding ourselves to the list of references.
template <typename U> linked_ptr(linked_ptr<U> const& ptr) {
copy(&ptr);
}
linked_ptr(linked_ptr const& ptr) { // NOLINT
assert(&ptr != this);
copy(&ptr);
}
// Assignment releases the old value and acquires the new.
template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
depart();
copy(&ptr);
return *this;
}
linked_ptr& operator=(linked_ptr const& ptr) {
if (&ptr != this) {
depart();
copy(&ptr);
}
return *this;
}
// Smart pointer members.
void reset(T* ptr = NULL) {
depart();
capture(ptr);
}
T* get() const {
return value_;
}
T* operator->() const {
return value_;
}
T& operator*() const {
return *value_;
}
bool operator==(T* p) const {
return value_ == p;
}
bool operator!=(T* p) const {
return value_ != p;
}
template <typename U>
bool operator==(linked_ptr<U> const& ptr) const {
return value_ == ptr.get();
}
template <typename U>
bool operator!=(linked_ptr<U> const& ptr) const {
return value_ != ptr.get();
}
private:
template <typename U>
friend class linked_ptr;
T* value_;
linked_ptr_internal link_;
void depart() {
if (link_.depart()) {
delete value_;
}
}
void capture(T* ptr) {
value_ = ptr;
link_.join_new();
}
template <typename U> void copy(linked_ptr<U> const* ptr) {
value_ = ptr->get();
if (value_) {
link_.join(&ptr->link_);
}
else {
link_.join_new();
}
}
};
template<typename T> inline
bool operator==(T* ptr, const linked_ptr<T>& x) {
return ptr == x.get();
}
template<typename T> inline
bool operator!=(T* ptr, const linked_ptr<T>& x) {
return ptr != x.get();
}
// A function to convert T* into linked_ptr<T>
// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
template <typename T>
linked_ptr<T> make_linked_ptr(T* ptr) {
return linked_ptr<T>(ptr);
}
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
@@ -0,0 +1,301 @@
$$ -*- mode: c++; -*-
$var n = 50 $$ Maximum length of Values arguments we want to support.
$var maxtuple = 10 $$ Maximum number of Combine arguments we want to support.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)
// Type and function utilities for implementing parameterized tests.
// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
//
// Currently Google Test supports at most $n arguments in Values,
// and at most $maxtuple arguments in Combine. Please contact
// googletestframework@googlegroups.com if you need more.
// Please note that the number of arguments to Combine is limited
// by the maximum arity of the implementation of tr1::tuple which is
// currently set at $maxtuple.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*. Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-param-util.h"
#include "gtest/internal/gtest-port.h"
#if GTEST_HAS_PARAM_TEST
namespace testing {
// Forward declarations of ValuesIn(), which is implemented in
// include/gtest/gtest-param-test.h.
template <typename ForwardIterator>
internal::ParamGenerator<
typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end);
template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
const Container& container);
namespace internal {
// Used in the Values() function to provide polymorphic capabilities.
template <typename T1>
class ValueArray1 {
public:
explicit ValueArray1(T1 v1) : v1_(v1) {}
template <typename T>
operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }
private:
// No implementation - assignment is unsupported.
void operator=(const ValueArray1& other);
const T1 v1_;
};
$range i 2..n
$for i [[
$range j 1..i
template <$for j, [[typename T$j]]>
class ValueArray$i {
public:
ValueArray$i($for j, [[T$j v$j]]) : $for j, [[v$(j)_(v$j)]] {}
template <typename T>
operator ParamGenerator<T>() const {
const T array[] = {$for j, [[static_cast<T>(v$(j)_)]]};
return ValuesIn(array);
}
private:
// No implementation - assignment is unsupported.
void operator=(const ValueArray$i& other);
$for j [[
const T$j v$(j)_;
]]
};
]]
# if GTEST_HAS_COMBINE
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Generates values from the Cartesian product of values produced
// by the argument generators.
//
$range i 2..maxtuple
$for i [[
$range j 1..i
$range k 2..i
template <$for j, [[typename T$j]]>
class CartesianProductGenerator$i
: public ParamGeneratorInterface< ::std::tr1::tuple<$for j, [[T$j]]> > {
public:
typedef ::std::tr1::tuple<$for j, [[T$j]]> ParamType;
CartesianProductGenerator$i($for j, [[const ParamGenerator<T$j>& g$j]])
: $for j, [[g$(j)_(g$j)]] {}
virtual ~CartesianProductGenerator$i() {}
virtual ParamIteratorInterface<ParamType>* Begin() const {
return new Iterator(this, $for j, [[g$(j)_, g$(j)_.begin()]]);
}
virtual ParamIteratorInterface<ParamType>* End() const {
return new Iterator(this, $for j, [[g$(j)_, g$(j)_.end()]]);
}
private:
class Iterator : public ParamIteratorInterface<ParamType> {
public:
Iterator(const ParamGeneratorInterface<ParamType>* base, $for j, [[
const ParamGenerator<T$j>& g$j,
const typename ParamGenerator<T$j>::iterator& current$(j)]])
: base_(base),
$for j, [[
begin$(j)_(g$j.begin()), end$(j)_(g$j.end()), current$(j)_(current$j)
]] {
ComputeCurrentValue();
}
virtual ~Iterator() {}
virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
return base_;
}
// Advance should not be called on beyond-of-range iterators
// so no component iterators must be beyond end of range, either.
virtual void Advance() {
assert(!AtEnd());
++current$(i)_;
$for k [[
if (current$(i+2-k)_ == end$(i+2-k)_) {
current$(i+2-k)_ = begin$(i+2-k)_;
++current$(i+2-k-1)_;
}
]]
ComputeCurrentValue();
}
virtual ParamIteratorInterface<ParamType>* Clone() const {
return new Iterator(*this);
}
virtual const ParamType* Current() const { return &current_value_; }
virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
// Having the same base generator guarantees that the other
// iterator is of the same type and we can downcast.
GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
<< "The program attempted to compare iterators "
<< "from different generators." << std::endl;
const Iterator* typed_other =
CheckedDowncastToActualType<const Iterator>(&other);
// We must report iterators equal if they both point beyond their
// respective ranges. That can happen in a variety of fashions,
// so we have to consult AtEnd().
return (AtEnd() && typed_other->AtEnd()) ||
($for j && [[
current$(j)_ == typed_other->current$(j)_
]]);
}
private:
Iterator(const Iterator& other)
: base_(other.base_), $for j, [[
begin$(j)_(other.begin$(j)_),
end$(j)_(other.end$(j)_),
current$(j)_(other.current$(j)_)
]] {
ComputeCurrentValue();
}
void ComputeCurrentValue() {
if (!AtEnd())
current_value_ = ParamType($for j, [[*current$(j)_]]);
}
bool AtEnd() const {
// We must report iterator past the end of the range when either of the
// component iterators has reached the end of its range.
return
$for j || [[
current$(j)_ == end$(j)_
]];
}
// No implementation - assignment is unsupported.
void operator=(const Iterator& other);
const ParamGeneratorInterface<ParamType>* const base_;
// begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
// current[i]_ is the actual traversing iterator.
$for j [[
const typename ParamGenerator<T$j>::iterator begin$(j)_;
const typename ParamGenerator<T$j>::iterator end$(j)_;
typename ParamGenerator<T$j>::iterator current$(j)_;
]]
ParamType current_value_;
}; // class CartesianProductGenerator$i::Iterator
// No implementation - assignment is unsupported.
void operator=(const CartesianProductGenerator$i& other);
$for j [[
const ParamGenerator<T$j> g$(j)_;
]]
}; // class CartesianProductGenerator$i
]]
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Helper classes providing Combine() with polymorphic features. They allow
// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
// convertible to U.
//
$range i 2..maxtuple
$for i [[
$range j 1..i
template <$for j, [[class Generator$j]]>
class CartesianProductHolder$i {
public:
CartesianProductHolder$i($for j, [[const Generator$j& g$j]])
: $for j, [[g$(j)_(g$j)]] {}
template <$for j, [[typename T$j]]>
operator ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >() const {
return ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >(
new CartesianProductGenerator$i<$for j, [[T$j]]>(
$for j,[[
static_cast<ParamGenerator<T$j> >(g$(j)_)
]]));
}
private:
// No implementation - assignment is unsupported.
void operator=(const CartesianProductHolder$i& other);
$for j [[
const Generator$j g$(j)_;
]]
}; // class CartesianProductHolder$i
]]
# endif // GTEST_HAS_COMBINE
} // namespace internal
} // namespace testing
#endif // GTEST_HAS_PARAM_TEST
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
@@ -0,0 +1,650 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)
// Type and function utilities for implementing parameterized tests.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
#include <iterator>
#include <utility>
#include <vector>
// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*. Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-linked_ptr.h"
#include "gtest/internal/gtest-port.h"
#include "gtest/gtest-printers.h"
#if GTEST_HAS_PARAM_TEST
namespace testing {
namespace internal {
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Outputs a message explaining invalid registration of different
// fixture class for the same test case. This may happen when
// TEST_P macro is used to define two tests with the same name
// but in different namespaces.
GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
const char* file, int line);
template <typename> class ParamGeneratorInterface;
template <typename> class ParamGenerator;
// Interface for iterating over elements provided by an implementation
// of ParamGeneratorInterface<T>.
template <typename T>
class ParamIteratorInterface {
public:
virtual ~ParamIteratorInterface() {}
// A pointer to the base generator instance.
// Used only for the purposes of iterator comparison
// to make sure that two iterators belong to the same generator.
virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
// Advances iterator to point to the next element
// provided by the generator. The caller is responsible
// for not calling Advance() on an iterator equal to
// BaseGenerator()->End().
virtual void Advance() = 0;
// Clones the iterator object. Used for implementing copy semantics
// of ParamIterator<T>.
virtual ParamIteratorInterface* Clone() const = 0;
// Dereferences the current iterator and provides (read-only) access
// to the pointed value. It is the caller's responsibility not to call
// Current() on an iterator equal to BaseGenerator()->End().
// Used for implementing ParamGenerator<T>::operator*().
virtual const T* Current() const = 0;
// Determines whether the given iterator and other point to the same
// element in the sequence generated by the generator.
// Used for implementing ParamGenerator<T>::operator==().
virtual bool Equals(const ParamIteratorInterface& other) const = 0;
};
// Class iterating over elements provided by an implementation of
// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
// and implements the const forward iterator concept.
template <typename T>
class ParamIterator {
public:
typedef T value_type;
typedef const T& reference;
typedef ptrdiff_t difference_type;
// ParamIterator assumes ownership of the impl_ pointer.
ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
ParamIterator& operator=(const ParamIterator& other) {
if (this != &other) {
impl_.reset(other.impl_->Clone());
}
return *this;
}
const T& operator*() const {
return *impl_->Current();
}
const T* operator->() const {
return impl_->Current();
}
// Prefix version of operator++.
ParamIterator& operator++() {
impl_->Advance();
return *this;
}
// Postfix version of operator++.
ParamIterator operator++(int /*unused*/) {
ParamIteratorInterface<T>* clone = impl_->Clone();
impl_->Advance();
return ParamIterator(clone);
}
bool operator==(const ParamIterator& other) const {
return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
}
bool operator!=(const ParamIterator& other) const {
return !(*this == other);
}
private:
friend class ParamGenerator<T>;
explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
scoped_ptr<ParamIteratorInterface<T> > impl_;
};
// ParamGeneratorInterface<T> is the binary interface to access generators
// defined in other translation units.
template <typename T>
class ParamGeneratorInterface {
public:
typedef T ParamType;
virtual ~ParamGeneratorInterface() {}
// Generator interface definition
virtual ParamIteratorInterface<T>* Begin() const = 0;
virtual ParamIteratorInterface<T>* End() const = 0;
};
// Wraps ParamGeneratorInterface<T> and provides general generator syntax
// compatible with the STL Container concept.
// This class implements copy initialization semantics and the contained
// ParamGeneratorInterface<T> instance is shared among all copies
// of the original object. This is possible because that instance is immutable.
template<typename T>
class ParamGenerator {
public:
typedef ParamIterator<T> iterator;
explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
ParamGenerator& operator=(const ParamGenerator& other) {
impl_ = other.impl_;
return *this;
}
iterator begin() const {
return iterator(impl_->Begin());
}
iterator end() const {
return iterator(impl_->End());
}
private:
linked_ptr<const ParamGeneratorInterface<T> > impl_;
};
// Generates values from a range of two comparable values. Can be used to
// generate sequences of user-defined types that implement operator+() and
// operator<().
// This class is used in the Range() function.
template <typename T, typename IncrementT>
class RangeGenerator : public ParamGeneratorInterface<T> {
public:
RangeGenerator(T begin, T end, IncrementT step)
: begin_(begin), end_(end),
step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
virtual ~RangeGenerator() {}
virtual ParamIteratorInterface<T>* Begin() const {
return new Iterator(this, begin_, 0, step_);
}
virtual ParamIteratorInterface<T>* End() const {
return new Iterator(this, end_, end_index_, step_);
}
private:
class Iterator : public ParamIteratorInterface<T> {
public:
Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
IncrementT step)
: base_(base), value_(value), index_(index), step_(step) {}
virtual ~Iterator() {}
virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
return base_;
}
virtual void Advance() {
value_ = value_ + step_;
index_++;
}
virtual ParamIteratorInterface<T>* Clone() const {
return new Iterator(*this);
}
virtual const T* Current() const {
return &value_;
}
virtual bool Equals(const ParamIteratorInterface<T>& other) const {
// Having the same base generator guarantees that the other
// iterator is of the same type and we can downcast.
GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
<< "The program attempted to compare iterators "
<< "from different generators." << std::endl;
const int other_index =
CheckedDowncastToActualType<const Iterator>(&other)->index_;
return index_ == other_index;
}
private:
Iterator(const Iterator& other)
: ParamIteratorInterface<T>(),
base_(other.base_), value_(other.value_), index_(other.index_),
step_(other.step_) {}
// No implementation - assignment is unsupported.
void operator=(const Iterator& other);
const ParamGeneratorInterface<T>* const base_;
T value_;
int index_;
const IncrementT step_;
}; // class RangeGenerator::Iterator
static int CalculateEndIndex(const T& begin,
const T& end,
const IncrementT& step) {
int end_index = 0;
for (T i = begin; i < end; i = i + step) {
end_index++;
}
return end_index;
}
// No implementation - assignment is unsupported.
void operator=(const RangeGenerator& other);
const T begin_;
const T end_;
const IncrementT step_;
// The index for the end() iterator. All the elements in the generated
// sequence are indexed (0-based) to aid iterator comparison.
const int end_index_;
}; // class RangeGenerator
// Generates values from a pair of STL-style iterators. Used in the
// ValuesIn() function. The elements are copied from the source range
// since the source can be located on the stack, and the generator
// is likely to persist beyond that stack frame.
template <typename T>
class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
public:
template <typename ForwardIterator>
ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
: container_(begin, end) {}
virtual ~ValuesInIteratorRangeGenerator() {}
virtual ParamIteratorInterface<T>* Begin() const {
return new Iterator(this, container_.begin());
}
virtual ParamIteratorInterface<T>* End() const {
return new Iterator(this, container_.end());
}
private:
typedef typename ::std::vector<T> ContainerType;
class Iterator : public ParamIteratorInterface<T> {
public:
Iterator(const ParamGeneratorInterface<T>* base,
typename ContainerType::const_iterator iterator)
: base_(base), iterator_(iterator) {}
virtual ~Iterator() {}
virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
return base_;
}
virtual void Advance() {
++iterator_;
value_.reset();
}
virtual ParamIteratorInterface<T>* Clone() const {
return new Iterator(*this);
}
// We need to use cached value referenced by iterator_ because *iterator_
// can return a temporary object (and of type other then T), so just
// having "return &*iterator_;" doesn't work.
// value_ is updated here and not in Advance() because Advance()
// can advance iterator_ beyond the end of the range, and we cannot
// detect that fact. The client code, on the other hand, is
// responsible for not calling Current() on an out-of-range iterator.
virtual const T* Current() const {
if (value_.get() == NULL) {
value_.reset(new T(*iterator_));
}
return value_.get();
}
virtual bool Equals(const ParamIteratorInterface<T>& other) const {
// Having the same base generator guarantees that the other
// iterator is of the same type and we can downcast.
GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
<< "The program attempted to compare iterators "
<< "from different generators." << std::endl;
return iterator_ ==
CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
}
private:
Iterator(const Iterator& other)
// The explicit constructor call suppresses a false warning
// emitted by gcc when supplied with the -Wextra option.
: ParamIteratorInterface<T>(),
base_(other.base_),
iterator_(other.iterator_) {}
const ParamGeneratorInterface<T>* const base_;
typename ContainerType::const_iterator iterator_;
// A cached value of *iterator_. We keep it here to allow access by
// pointer in the wrapping iterator's operator->().
// value_ needs to be mutable to be accessed in Current().
// Use of scoped_ptr helps manage cached value's lifetime,
// which is bound by the lifespan of the iterator itself.
mutable scoped_ptr<const T> value_;
}; // class ValuesInIteratorRangeGenerator::Iterator
// No implementation - assignment is unsupported.
void operator=(const ValuesInIteratorRangeGenerator& other);
const ContainerType container_;
}; // class ValuesInIteratorRangeGenerator
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Stores a parameter value and later creates tests parameterized with that
// value.
template <class TestClass>
class ParameterizedTestFactory : public TestFactoryBase {
public:
typedef typename TestClass::ParamType ParamType;
explicit ParameterizedTestFactory(ParamType parameter) :
parameter_(parameter) {}
virtual Test* CreateTest() {
TestClass::SetParam(&parameter_);
return new TestClass();
}
private:
const ParamType parameter_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
};
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactoryBase is a base class for meta-factories that create
// test factories for passing into MakeAndRegisterTestInfo function.
template <class ParamType>
class TestMetaFactoryBase {
public:
virtual ~TestMetaFactoryBase() {}
virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
};
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactory creates test factories for passing into
// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
// ownership of test factory pointer, same factory object cannot be passed
// into that method twice. But ParameterizedTestCaseInfo is going to call
// it for each Test/Parameter value combination. Thus it needs meta factory
// creator class.
template <class TestCase>
class TestMetaFactory
: public TestMetaFactoryBase<typename TestCase::ParamType> {
public:
typedef typename TestCase::ParamType ParamType;
TestMetaFactory() {}
virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
return new ParameterizedTestFactory<TestCase>(parameter);
}
private:
GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
};
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfoBase is a generic interface
// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
// accumulates test information provided by TEST_P macro invocations
// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
// and uses that information to register all resulting test instances
// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
// a collection of pointers to the ParameterizedTestCaseInfo objects
// and calls RegisterTests() on each of them when asked.
class ParameterizedTestCaseInfoBase {
public:
virtual ~ParameterizedTestCaseInfoBase() {}
// Base part of test case name for display purposes.
virtual const string& GetTestCaseName() const = 0;
// Test case id to verify identity.
virtual TypeId GetTestCaseTypeId() const = 0;
// UnitTest class invokes this method to register tests in this
// test case right before running them in RUN_ALL_TESTS macro.
// This method should not be called more then once on any single
// instance of a ParameterizedTestCaseInfoBase derived class.
virtual void RegisterTests() = 0;
protected:
ParameterizedTestCaseInfoBase() {}
private:
GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
};
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
// macro invocations for a particular test case and generators
// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
// test case. It registers tests with all values generated by all
// generators when asked.
template <class TestCase>
class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
public:
// ParamType and GeneratorCreationFunc are private types but are required
// for declarations of public methods AddTestPattern() and
// AddTestCaseInstantiation().
typedef typename TestCase::ParamType ParamType;
// A function that returns an instance of appropriate generator type.
typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
explicit ParameterizedTestCaseInfo(const char* name)
: test_case_name_(name) {}
// Test case base name for display purposes.
virtual const string& GetTestCaseName() const {
return test_case_name_;
}
// Test case id to verify identity.
virtual TypeId GetTestCaseTypeId() const {
return GetTypeId<TestCase>();
}
// TEST_P macro uses AddTestPattern() to record information
// about a single test in a LocalTestInfo structure.
// test_case_name is the base name of the test case (without invocation
// prefix). test_base_name is the name of an individual test without
// parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
// test case base name and DoBar is test base name.
void AddTestPattern(const char* test_case_name,
const char* test_base_name,
TestMetaFactoryBase<ParamType>* meta_factory) {
tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
test_base_name,
meta_factory)));
}
// INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
// about a generator.
int AddTestCaseInstantiation(const string& instantiation_name,
GeneratorCreationFunc* func,
const char* /* file */,
int /* line */) {
instantiations_.push_back(::std::make_pair(instantiation_name, func));
return 0; // Return value used only to run this method in namespace scope.
}
// UnitTest class invokes this method to register tests in this test case
// test cases right before running tests in RUN_ALL_TESTS macro.
// This method should not be called more then once on any single
// instance of a ParameterizedTestCaseInfoBase derived class.
// UnitTest has a guard to prevent from calling this method more then once.
virtual void RegisterTests() {
for (typename TestInfoContainer::iterator test_it = tests_.begin();
test_it != tests_.end(); ++test_it) {
linked_ptr<TestInfo> test_info = *test_it;
for (typename InstantiationContainer::iterator gen_it =
instantiations_.begin(); gen_it != instantiations_.end();
++gen_it) {
const string& instantiation_name = gen_it->first;
ParamGenerator<ParamType> generator((*gen_it->second)());
string test_case_name;
if ( !instantiation_name.empty() ) {
test_case_name = instantiation_name + "/";
}
test_case_name += test_info->test_case_base_name;
int i = 0;
for (typename ParamGenerator<ParamType>::iterator param_it =
generator.begin();
param_it != generator.end(); ++param_it, ++i) {
Message test_name_stream;
test_name_stream << test_info->test_base_name << "/" << i;
MakeAndRegisterTestInfo(
test_case_name.c_str(),
test_name_stream.GetString().c_str(),
NULL, // No type parameter.
PrintToString(*param_it).c_str(),
GetTestCaseTypeId(),
TestCase::SetUpTestCase,
TestCase::TearDownTestCase,
test_info->test_meta_factory->CreateTestFactory(*param_it));
} // for param_it
} // for gen_it
} // for test_it
} // RegisterTests
private:
// LocalTestInfo structure keeps information about a single test registered
// with TEST_P macro.
struct TestInfo {
TestInfo(const char* a_test_case_base_name,
const char* a_test_base_name,
TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
test_case_base_name(a_test_case_base_name),
test_base_name(a_test_base_name),
test_meta_factory(a_test_meta_factory) {}
const string test_case_base_name;
const string test_base_name;
const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
};
typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
// Keeps pairs of <Instantiation name, Sequence generator creation function>
// received from INSTANTIATE_TEST_CASE_P macros.
typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
InstantiationContainer;
const string test_case_name_;
TestInfoContainer tests_;
InstantiationContainer instantiations_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
}; // class ParameterizedTestCaseInfo
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
// macros use it to locate their corresponding ParameterizedTestCaseInfo
// descriptors.
class ParameterizedTestCaseRegistry {
public:
ParameterizedTestCaseRegistry() {}
~ParameterizedTestCaseRegistry() {
for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
it != test_case_infos_.end(); ++it) {
delete *it;
}
}
// Looks up or creates and returns a structure containing information about
// tests and instantiations of a particular test case.
template <class TestCase>
ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
const char* test_case_name,
const char* file,
int line) {
ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
it != test_case_infos_.end(); ++it) {
if ((*it)->GetTestCaseName() == test_case_name) {
if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
// Complain about incorrect usage of Google Test facilities
// and terminate the program since we cannot guaranty correct
// test case setup and tear-down in this case.
ReportInvalidTestCaseType(test_case_name, file, line);
posix::Abort();
}
else {
// At this point we are sure that the object we found is of the same
// type we are looking for, so we downcast it to that type
// without further checks.
typed_test_info = CheckedDowncastToActualType <
ParameterizedTestCaseInfo<TestCase> > (*it);
}
break;
}
}
if (typed_test_info == NULL) {
typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
test_case_infos_.push_back(typed_test_info);
}
return typed_test_info;
}
void RegisterTests() {
for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
it != test_case_infos_.end(); ++it) {
(*it)->RegisterTests();
}
}
private:
typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
TestCaseInfoContainer test_case_infos_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
};
} // namespace internal
} // namespace testing
#endif // GTEST_HAS_PARAM_TEST
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,167 @@
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares the String class and functions used internally by
// Google Test. They are subject to change without notice. They should not used
// by code external to Google Test.
//
// This header file is #included by <gtest/internal/gtest-internal.h>.
// It should not be #included by other files.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
#ifdef __BORLANDC__
// string.h is not guaranteed to provide strcpy on C++ Builder.
# include <mem.h>
#endif
#include <string.h>
#include <string>
#include "gtest/internal/gtest-port.h"
namespace testing {
namespace internal {
// String - an abstract class holding static string utilities.
class GTEST_API_ String {
public:
// Static utility methods
// Clones a 0-terminated C string, allocating memory using new. The
// caller is responsible for deleting the return value using
// delete[]. Returns the cloned string, or NULL if the input is
// NULL.
//
// This is different from strdup() in string.h, which allocates
// memory using malloc().
static const char* CloneCString(const char* c_str);
#if GTEST_OS_WINDOWS_MOBILE
// Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
// able to pass strings to Win32 APIs on CE we need to convert them
// to 'Unicode', UTF-16.
// Creates a UTF-16 wide string from the given ANSI string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the wide string, or NULL if the
// input is NULL.
//
// The wide string is created using the ANSI codepage (CP_ACP) to
// match the behaviour of the ANSI versions of Win32 calls and the
// C runtime.
static LPCWSTR AnsiToUtf16(const char* c_str);
// Creates an ANSI string from the given wide string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the ANSI string, or NULL if the
// input is NULL.
//
// The returned string is created using the ANSI codepage (CP_ACP) to
// match the behaviour of the ANSI versions of Win32 calls and the
// C runtime.
static const char* Utf16ToAnsi(LPCWSTR utf16_str);
#endif
// Compares two C strings. Returns true iff they have the same content.
//
// Unlike strcmp(), this function can handle NULL argument(s). A
// NULL C string is considered different to any non-NULL C string,
// including the empty string.
static bool CStringEquals(const char* lhs, const char* rhs);
// Converts a wide C string to a String using the UTF-8 encoding.
// NULL will be converted to "(null)". If an error occurred during
// the conversion, "(failed to convert from wide string)" is
// returned.
static std::string ShowWideCString(const wchar_t* wide_c_str);
// Compares two wide C strings. Returns true iff they have the same
// content.
//
// Unlike wcscmp(), this function can handle NULL argument(s). A
// NULL C string is considered different to any non-NULL C string,
// including the empty string.
static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
// Compares two C strings, ignoring case. Returns true iff they
// have the same content.
//
// Unlike strcasecmp(), this function can handle NULL argument(s).
// A NULL C string is considered different to any non-NULL C string,
// including the empty string.
static bool CaseInsensitiveCStringEquals(const char* lhs,
const char* rhs);
// Compares two wide C strings, ignoring case. Returns true iff they
// have the same content.
//
// Unlike wcscasecmp(), this function can handle NULL argument(s).
// A NULL C string is considered different to any non-NULL wide C string,
// including the empty string.
// NB: The implementations on different platforms slightly differ.
// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
// environment variable. On GNU platform this method uses wcscasecmp
// which compares according to LC_CTYPE category of the current locale.
// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
// current locale.
static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
const wchar_t* rhs);
// Returns true iff the given string ends with the given suffix, ignoring
// case. Any string is considered to end with an empty suffix.
static bool EndsWithCaseInsensitive(
const std::string& str, const std::string& suffix);
// Formats an int value as "%02d".
static std::string FormatIntWidth2(int value); // "%02d" for width == 2
// Formats an int value as "%X".
static std::string FormatHexInt(int value);
// Formats a byte as "%02X".
static std::string FormatByte(unsigned char value);
private:
String(); // Not meant to be instantiated.
}; // class String
// Gets the content of the stringstream's buffer as an std::string. Each '\0'
// character in the buffer is replaced with "\\0".
GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,339 @@
$$ -*- mode: c++; -*-
$var n = 10 $$ Maximum number of tuple fields we want to support.
$$ This meta comment fixes auto-indentation in Emacs. }}
// Copyright 2009 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#include <utility> // For ::std::pair.
// The compiler used in Symbian has a bug that prevents us from declaring the
// tuple template as a friend (it complains that tuple is redefined). This
// hack bypasses the bug by declaring the members that should otherwise be
// private as public.
// Sun Studio versions < 12 also have the above bug.
#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
#else
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
template <GTEST_$(n)_TYPENAMES_(U)> friend class tuple; \
private:
#endif
$range i 0..n-1
$range j 0..n
$range k 1..n
// GTEST_n_TUPLE_(T) is the type of an n-tuple.
#define GTEST_0_TUPLE_(T) tuple<>
$for k [[
$range m 0..k-1
$range m2 k..n-1
#define GTEST_$(k)_TUPLE_(T) tuple<$for m, [[T##$m]]$for m2 [[, void]]>
]]
// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
$for j [[
$range m 0..j-1
#define GTEST_$(j)_TYPENAMES_(T) $for m, [[typename T##$m]]
]]
// In theory, defining stuff in the ::std namespace is undefined
// behavior. We can do this as we are playing the role of a standard
// library vendor.
namespace std {
namespace tr1 {
template <$for i, [[typename T$i = void]]>
class tuple;
// Anything in namespace gtest_internal is Google Test's INTERNAL
// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
namespace gtest_internal {
// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
template <typename T>
struct ByRef { typedef const T& type; }; // NOLINT
template <typename T>
struct ByRef<T&> { typedef T& type; }; // NOLINT
// A handy wrapper for ByRef.
#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
// AddRef<T>::type is T if T is a reference; otherwise it's T&. This
// is the same as tr1::add_reference<T>::type.
template <typename T>
struct AddRef { typedef T& type; }; // NOLINT
template <typename T>
struct AddRef<T&> { typedef T& type; }; // NOLINT
// A handy wrapper for AddRef.
#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
// A helper for implementing get<k>().
template <int k> class Get;
// A helper for implementing tuple_element<k, T>. kIndexValid is true
// iff k < the number of fields in tuple type T.
template <bool kIndexValid, int kIndex, class Tuple>
struct TupleElement;
$for i [[
template <GTEST_$(n)_TYPENAMES_(T)>
struct TupleElement<true, $i, GTEST_$(n)_TUPLE_(T) > {
typedef T$i type;
};
]]
} // namespace gtest_internal
template <>
class tuple<> {
public:
tuple() {}
tuple(const tuple& /* t */) {}
tuple& operator=(const tuple& /* t */) { return *this; }
};
$for k [[
$range m 0..k-1
template <GTEST_$(k)_TYPENAMES_(T)>
class $if k < n [[GTEST_$(k)_TUPLE_(T)]] $else [[tuple]] {
public:
template <int k> friend class gtest_internal::Get;
tuple() : $for m, [[f$(m)_()]] {}
explicit tuple($for m, [[GTEST_BY_REF_(T$m) f$m]]) : [[]]
$for m, [[f$(m)_(f$m)]] {}
tuple(const tuple& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
template <GTEST_$(k)_TYPENAMES_(U)>
tuple(const GTEST_$(k)_TUPLE_(U)& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
$if k == 2 [[
template <typename U0, typename U1>
tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
]]
tuple& operator=(const tuple& t) { return CopyFrom(t); }
template <GTEST_$(k)_TYPENAMES_(U)>
tuple& operator=(const GTEST_$(k)_TUPLE_(U)& t) {
return CopyFrom(t);
}
$if k == 2 [[
template <typename U0, typename U1>
tuple& operator=(const ::std::pair<U0, U1>& p) {
f0_ = p.first;
f1_ = p.second;
return *this;
}
]]
GTEST_DECLARE_TUPLE_AS_FRIEND_
template <GTEST_$(k)_TYPENAMES_(U)>
tuple& CopyFrom(const GTEST_$(k)_TUPLE_(U)& t) {
$for m [[
f$(m)_ = t.f$(m)_;
]]
return *this;
}
$for m [[
T$m f$(m)_;
]]
};
]]
// 6.1.3.2 Tuple creation functions.
// Known limitations: we don't support passing an
// std::tr1::reference_wrapper<T> to make_tuple(). And we don't
// implement tie().
inline tuple<> make_tuple() { return tuple<>(); }
$for k [[
$range m 0..k-1
template <GTEST_$(k)_TYPENAMES_(T)>
inline GTEST_$(k)_TUPLE_(T) make_tuple($for m, [[const T$m& f$m]]) {
return GTEST_$(k)_TUPLE_(T)($for m, [[f$m]]);
}
]]
// 6.1.3.3 Tuple helper classes.
template <typename Tuple> struct tuple_size;
$for j [[
template <GTEST_$(j)_TYPENAMES_(T)>
struct tuple_size<GTEST_$(j)_TUPLE_(T) > {
static const int value = $j;
};
]]
template <int k, class Tuple>
struct tuple_element {
typedef typename gtest_internal::TupleElement<
k < (tuple_size<Tuple>::value), k, Tuple>::type type;
};
#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
// 6.1.3.4 Element access.
namespace gtest_internal {
$for i [[
template <>
class Get<$i> {
public:
template <class Tuple>
static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
Field(Tuple& t) { return t.f$(i)_; } // NOLINT
template <class Tuple>
static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
ConstField(const Tuple& t) { return t.f$(i)_; }
};
]]
} // namespace gtest_internal
template <int k, GTEST_$(n)_TYPENAMES_(T)>
GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
get(GTEST_$(n)_TUPLE_(T)& t) {
return gtest_internal::Get<k>::Field(t);
}
template <int k, GTEST_$(n)_TYPENAMES_(T)>
GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
get(const GTEST_$(n)_TUPLE_(T)& t) {
return gtest_internal::Get<k>::ConstField(t);
}
// 6.1.3.5 Relational operators
// We only implement == and !=, as we don't have a need for the rest yet.
namespace gtest_internal {
// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
// first k fields of t1 equals the first k fields of t2.
// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
// k1 != k2.
template <int kSize1, int kSize2>
struct SameSizeTuplePrefixComparator;
template <>
struct SameSizeTuplePrefixComparator<0, 0> {
template <class Tuple1, class Tuple2>
static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
return true;
}
};
template <int k>
struct SameSizeTuplePrefixComparator<k, k> {
template <class Tuple1, class Tuple2>
static bool Eq(const Tuple1& t1, const Tuple2& t2) {
return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
}
};
} // namespace gtest_internal
template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
inline bool operator==(const GTEST_$(n)_TUPLE_(T)& t,
const GTEST_$(n)_TUPLE_(U)& u) {
return gtest_internal::SameSizeTuplePrefixComparator<
tuple_size<GTEST_$(n)_TUPLE_(T) >::value,
tuple_size<GTEST_$(n)_TUPLE_(U) >::value>::Eq(t, u);
}
template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
inline bool operator!=(const GTEST_$(n)_TUPLE_(T)& t,
const GTEST_$(n)_TUPLE_(U)& u) { return !(t == u); }
// 6.1.4 Pairs.
// Unimplemented.
} // namespace tr1
} // namespace std
$for j [[
#undef GTEST_$(j)_TUPLE_
]]
$for j [[
#undef GTEST_$(j)_TYPENAMES_
]]
#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
#undef GTEST_BY_REF_
#undef GTEST_ADD_REF_
#undef GTEST_TUPLE_ELEMENT_
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,297 @@
$$ -*- mode: c++; -*-
$var n = 50 $$ Maximum length of type lists we want to support.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
// Type utilities needed for implementing typed and type-parameterized
// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
//
// Currently we support at most $n types in a list, and at most $n
// type-parameterized tests in one type-parameterized test case.
// Please contact googletestframework@googlegroups.com if you need
// more.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#include "gtest/internal/gtest-port.h"
// #ifdef __GNUC__ is too general here. It is possible to use gcc without using
// libstdc++ (which is where cxxabi.h comes from).
# if GTEST_HAS_CXXABI_H_
# include <cxxabi.h>
# elif defined(__HP_aCC)
# include <acxx_demangle.h>
# endif // GTEST_HASH_CXXABI_H_
namespace testing {
namespace internal {
// GetTypeName<T>() returns a human-readable name of type T.
// NB: This function is also used in Google Mock, so don't move it inside of
// the typed-test-only section below.
template <typename T>
std::string GetTypeName() {
# if GTEST_HAS_RTTI
const char* const name = typeid(T).name();
# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
int status = 0;
// gcc's implementation of typeid(T).name() mangles the type name,
// so we have to demangle it.
# if GTEST_HAS_CXXABI_H_
using abi::__cxa_demangle;
# endif // GTEST_HAS_CXXABI_H_
char* const readable_name = __cxa_demangle(name, 0, 0, &status);
const std::string name_str(status == 0 ? readable_name : name);
free(readable_name);
return name_str;
# else
return name;
# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC
# else
return "<type>";
# endif // GTEST_HAS_RTTI
}
#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
// type. This can be used as a compile-time assertion to ensure that
// two types are equal.
template <typename T1, typename T2>
struct AssertTypeEq;
template <typename T>
struct AssertTypeEq<T, T> {
typedef bool type;
};
// A unique type used as the default value for the arguments of class
// template Types. This allows us to simulate variadic templates
// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
// support directly.
struct None {};
// The following family of struct and struct templates are used to
// represent type lists. In particular, TypesN<T1, T2, ..., TN>
// represents a type list with N types (T1, T2, ..., and TN) in it.
// Except for Types0, every struct in the family has two member types:
// Head for the first type in the list, and Tail for the rest of the
// list.
// The empty type list.
struct Types0 {};
// Type lists of length 1, 2, 3, and so on.
template <typename T1>
struct Types1 {
typedef T1 Head;
typedef Types0 Tail;
};
$range i 2..n
$for i [[
$range j 1..i
$range k 2..i
template <$for j, [[typename T$j]]>
struct Types$i {
typedef T1 Head;
typedef Types$(i-1)<$for k, [[T$k]]> Tail;
};
]]
} // namespace internal
// We don't want to require the users to write TypesN<...> directly,
// as that would require them to count the length. Types<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Types<int>
// will appear as Types<int, None, None, ..., None> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Types<T1, ..., TN>, and Google Test will translate
// that to TypesN<T1, ..., TN> internally to make error messages
// readable. The translation is done by the 'type' member of the
// Types template.
$range i 1..n
template <$for i, [[typename T$i = internal::None]]>
struct Types {
typedef internal::Types$n<$for i, [[T$i]]> type;
};
template <>
struct Types<$for i, [[internal::None]]> {
typedef internal::Types0 type;
};
$range i 1..n-1
$for i [[
$range j 1..i
$range k i+1..n
template <$for j, [[typename T$j]]>
struct Types<$for j, [[T$j]]$for k[[, internal::None]]> {
typedef internal::Types$i<$for j, [[T$j]]> type;
};
]]
namespace internal {
# define GTEST_TEMPLATE_ template <typename T> class
// The template "selector" struct TemplateSel<Tmpl> is used to
// represent Tmpl, which must be a class template with one type
// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined
// as the type Tmpl<T>. This allows us to actually instantiate the
// template "selected" by TemplateSel<Tmpl>.
//
// This trick is necessary for simulating typedef for class templates,
// which C++ doesn't support directly.
template <GTEST_TEMPLATE_ Tmpl>
struct TemplateSel {
template <typename T>
struct Bind {
typedef Tmpl<T> type;
};
};
# define GTEST_BIND_(TmplSel, T) \
TmplSel::template Bind<T>::type
// A unique struct template used as the default value for the
// arguments of class template Templates. This allows us to simulate
// variadic templates (e.g. Templates<int>, Templates<int, double>,
// and etc), which C++ doesn't support directly.
template <typename T>
struct NoneT {};
// The following family of struct and struct templates are used to
// represent template lists. In particular, TemplatesN<T1, T2, ...,
// TN> represents a list of N templates (T1, T2, ..., and TN). Except
// for Templates0, every struct in the family has two member types:
// Head for the selector of the first template in the list, and Tail
// for the rest of the list.
// The empty template list.
struct Templates0 {};
// Template lists of length 1, 2, 3, and so on.
template <GTEST_TEMPLATE_ T1>
struct Templates1 {
typedef TemplateSel<T1> Head;
typedef Templates0 Tail;
};
$range i 2..n
$for i [[
$range j 1..i
$range k 2..i
template <$for j, [[GTEST_TEMPLATE_ T$j]]>
struct Templates$i {
typedef TemplateSel<T1> Head;
typedef Templates$(i-1)<$for k, [[T$k]]> Tail;
};
]]
// We don't want to require the users to write TemplatesN<...> directly,
// as that would require them to count the length. Templates<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Templates<list>
// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Templates<T1, ..., TN>, and Google Test will translate
// that to TemplatesN<T1, ..., TN> internally to make error messages
// readable. The translation is done by the 'type' member of the
// Templates template.
$range i 1..n
template <$for i, [[GTEST_TEMPLATE_ T$i = NoneT]]>
struct Templates {
typedef Templates$n<$for i, [[T$i]]> type;
};
template <>
struct Templates<$for i, [[NoneT]]> {
typedef Templates0 type;
};
$range i 1..n-1
$for i [[
$range j 1..i
$range k i+1..n
template <$for j, [[GTEST_TEMPLATE_ T$j]]>
struct Templates<$for j, [[T$j]]$for k[[, NoneT]]> {
typedef Templates$i<$for j, [[T$j]]> type;
};
]]
// The TypeList template makes it possible to use either a single type
// or a Types<...> list in TYPED_TEST_CASE() and
// INSTANTIATE_TYPED_TEST_CASE_P().
template <typename T>
struct TypeList {
typedef Types1<T> type;
};
$range i 1..n
template <$for i, [[typename T$i]]>
struct TypeList<Types<$for i, [[T$i]]> > {
typedef typename Types<$for i, [[T$i]]>::type type;
};
#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
} // namespace internal
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+51
View File
@@ -0,0 +1,51 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// Google C++ Testing Framework (Google Test)
//
// Sometimes it's desirable to build Google Test by compiling a single file.
// This file serves this purpose.
// This line ensures that gtest.h can be compiled on its own, even
// when it's fused.
#include "gtest/gtest.h"
// The following lines pull in the real gtest *.cc files.
/**
#include "src/gtest.cc"
#include "src/gtest-death-test.cc"
#include "src/gtest-filepath.cc"
#include "src/gtest-port.cc"
#include "src/gtest-printers.cc"
#include "src/gtest-test-part.cc"
#include "src/gtest-typed-test.cc"
**/
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,409 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: keith.ray@gmail.com (Keith Ray)
#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-filepath.h"
#include "gtest/internal/gtest-port.h"
#include <stdlib.h>
#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>
#elif GTEST_OS_WINDOWS
# include <direct.h>
# include <io.h>
#elif GTEST_OS_SYMBIAN
// Symbian OpenC has PATH_MAX in sys/syslimits.h
# include <sys/syslimits.h>
#else
# include <limits.h>
# include <climits> // Some Linux distributions define PATH_MAX here.
#endif // GTEST_OS_WINDOWS_MOBILE
#if GTEST_OS_WINDOWS
# define GTEST_PATH_MAX_ _MAX_PATH
#elif defined(PATH_MAX)
# define GTEST_PATH_MAX_ PATH_MAX
#elif defined(_XOPEN_PATH_MAX)
# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
#else
# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
#endif // GTEST_OS_WINDOWS
#include "gtest/internal/gtest-string.h"
namespace testing {
namespace internal {
#if GTEST_OS_WINDOWS
// On Windows, '\\' is the standard path separator, but many tools and the
// Windows API also accept '/' as an alternate path separator. Unless otherwise
// noted, a file path can contain either kind of path separators, or a mixture
// of them.
const char kPathSeparator = '\\';
const char kAlternatePathSeparator = '/';
const char kPathSeparatorString[] = "\\";
const char kAlternatePathSeparatorString[] = "/";
# if GTEST_OS_WINDOWS_MOBILE
// Windows CE doesn't have a current directory. You should not use
// the current directory in tests on Windows CE, but this at least
// provides a reasonable fallback.
const char kCurrentDirectoryString[] = "\\";
// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
const DWORD kInvalidFileAttributes = 0xffffffff;
# else
const char kCurrentDirectoryString[] = ".\\";
# endif // GTEST_OS_WINDOWS_MOBILE
#else
const char kPathSeparator = '/';
const char kPathSeparatorString[] = "/";
const char kCurrentDirectoryString[] = "./";
#endif // GTEST_OS_WINDOWS
// Returns whether the given character is a valid path separator.
static bool IsPathSeparator(char c) {
#if GTEST_HAS_ALT_PATH_SEP_
return (c == kPathSeparator) || (c == kAlternatePathSeparator);
#else
return c == kPathSeparator;
#endif
}
// Returns the current working directory, or "" if unsuccessful.
FilePath FilePath::GetCurrentDir() {
#if GTEST_OS_WINDOWS_MOBILE
// Windows CE doesn't have a current directory, so we just return
// something reasonable.
return FilePath(kCurrentDirectoryString);
#elif GTEST_OS_WINDOWS
char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#else
char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#endif // GTEST_OS_WINDOWS_MOBILE
}
// Returns a copy of the FilePath with the case-insensitive extension removed.
// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
// FilePath("dir/file"). If a case-insensitive extension is not
// found, returns a copy of the original FilePath.
FilePath FilePath::RemoveExtension(const char* extension) const {
const std::string dot_extension = std::string(".") + extension;
if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
return FilePath(pathname_.substr(
0, pathname_.length() - dot_extension.length()));
}
return *this;
}
// Returns a pointer to the last occurence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FilePath::FindLastPathSeparator() const {
const char* const last_sep = strrchr(c_str(), kPathSeparator);
#if GTEST_HAS_ALT_PATH_SEP_
const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
// Comparing two pointers of which only one is NULL is undefined.
if (last_alt_sep != NULL &&
(last_sep == NULL || last_alt_sep > last_sep)) {
return last_alt_sep;
}
#endif
return last_sep;
}
// Returns a copy of the FilePath with the directory part removed.
// Example: FilePath("path/to/file").RemoveDirectoryName() returns
// FilePath("file"). If there is no directory part ("just_a_file"), it returns
// the FilePath unmodified. If there is no file part ("just_a_dir/") it
// returns an empty FilePath ("").
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveDirectoryName() const {
const char* const last_sep = FindLastPathSeparator();
return last_sep ? FilePath(last_sep + 1) : *this;
}
// RemoveFileName returns the directory path with the filename removed.
// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveFileName() const {
const char* const last_sep = FindLastPathSeparator();
std::string dir;
if (last_sep) {
dir = std::string(c_str(), last_sep + 1 - c_str());
}
else {
dir = kCurrentDirectoryString;
}
return FilePath(dir);
}
// Helper functions for naming files in a directory for xml output.
// Given directory = "dir", base_name = "test", number = 0,
// extension = "xml", returns "dir/test.xml". If number is greater
// than zero (e.g., 12), returns "dir/test_12.xml".
// On Windows platform, uses \ as the separator rather than /.
FilePath FilePath::MakeFileName(const FilePath& directory,
const FilePath& base_name,
int number,
const char* extension) {
std::string file;
if (number == 0) {
file = base_name.string() + "." + extension;
}
else {
file = base_name.string() + "_" + StreamableToString(number)
+ "." + extension;
}
return ConcatPaths(directory, FilePath(file));
}
// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
// On Windows, uses \ as the separator rather than /.
FilePath FilePath::ConcatPaths(const FilePath& directory,
const FilePath& relative_path) {
if (directory.IsEmpty()) {
return relative_path;
}
const FilePath dir(directory.RemoveTrailingPathSeparator());
return FilePath(dir.string() + kPathSeparator + relative_path.string());
}
// Returns true if pathname describes something findable in the file-system,
// either a file, directory, or whatever.
bool FilePath::FileOrDirectoryExists() const {
#if GTEST_OS_WINDOWS_MOBILE
LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
const DWORD attributes = GetFileAttributes(unicode);
delete [] unicode;
return attributes != kInvalidFileAttributes;
#else
posix::StatStruct file_stat;
return posix::Stat(pathname_.c_str(), &file_stat) == 0;
#endif // GTEST_OS_WINDOWS_MOBILE
}
// Returns true if pathname describes a directory in the file-system
// that exists.
bool FilePath::DirectoryExists() const {
bool result = false;
#if GTEST_OS_WINDOWS
// Don't strip off trailing separator if path is a root directory on
// Windows (like "C:\\").
const FilePath& path(IsRootDirectory() ? *this :
RemoveTrailingPathSeparator());
#else
const FilePath& path(*this);
#endif
#if GTEST_OS_WINDOWS_MOBILE
LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
const DWORD attributes = GetFileAttributes(unicode);
delete [] unicode;
if ((attributes != kInvalidFileAttributes) &&
(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
result = true;
}
#else
posix::StatStruct file_stat;
result = posix::Stat(path.c_str(), &file_stat) == 0 &&
posix::IsDir(file_stat);
#endif // GTEST_OS_WINDOWS_MOBILE
return result;
}
// Returns true if pathname describes a root directory. (Windows has one
// root directory per disk drive.)
bool FilePath::IsRootDirectory() const {
#if GTEST_OS_WINDOWS
// TODO(wan@google.com): on Windows a network share like
// \\server\share can be a root directory, although it cannot be the
// current directory. Handle this properly.
return pathname_.length() == 3 && IsAbsolutePath();
#else
return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
#endif
}
// Returns true if pathname describes an absolute path.
bool FilePath::IsAbsolutePath() const {
const char* const name = pathname_.c_str();
#if GTEST_OS_WINDOWS
return pathname_.length() >= 3 &&
((name[0] >= 'a' && name[0] <= 'z') ||
(name[0] >= 'A' && name[0] <= 'Z')) &&
name[1] == ':' &&
IsPathSeparator(name[2]);
#else
return IsPathSeparator(name[0]);
#endif
}
// Returns a pathname for a file that does not currently exist. The pathname
// will be directory/base_name.extension or
// directory/base_name_<number>.extension if directory/base_name.extension
// already exists. The number will be incremented until a pathname is found
// that does not already exist.
// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
// There could be a race condition if two or more processes are calling this
// function at the same time -- they could both pick the same filename.
FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
const FilePath& base_name,
const char* extension) {
FilePath full_pathname;
int number = 0;
do {
full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
}
while (full_pathname.FileOrDirectoryExists());
return full_pathname;
}
// Returns true if FilePath ends with a path separator, which indicates that
// it is intended to represent a directory. Returns false otherwise.
// This does NOT check that a directory (or file) actually exists.
bool FilePath::IsDirectory() const {
return !pathname_.empty() &&
IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
}
// Create directories so that path exists. Returns true if successful or if
// the directories already exist; returns false if unable to create directories
// for any reason.
bool FilePath::CreateDirectoriesRecursively() const {
if (!this->IsDirectory()) {
return false;
}
if (pathname_.length() == 0 || this->DirectoryExists()) {
return true;
}
const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
return parent.CreateDirectoriesRecursively() && this->CreateFolder();
}
// Create the directory so that path exists. Returns true if successful or
// if the directory already exists; returns false if unable to create the
// directory for any reason, including if the parent directory does not
// exist. Not named "CreateDirectory" because that's a macro on Windows.
bool FilePath::CreateFolder() const {
#if GTEST_OS_WINDOWS_MOBILE
FilePath removed_sep(this->RemoveTrailingPathSeparator());
LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
int result = CreateDirectory(unicode, NULL) ? 0 : -1;
delete [] unicode;
#elif GTEST_OS_WINDOWS
int result = _mkdir(pathname_.c_str());
#else
int result = mkdir(pathname_.c_str(), 0777);
#endif // GTEST_OS_WINDOWS_MOBILE
if (result == -1) {
return this->DirectoryExists(); // An error is OK if the directory exists.
}
return true; // No error.
}
// If input name has a trailing separator character, remove it and return the
// name, otherwise return the name string unmodified.
// On Windows platform, uses \ as the separator, other platforms use /.
FilePath FilePath::RemoveTrailingPathSeparator() const {
return IsDirectory()
? FilePath(pathname_.substr(0, pathname_.length() - 1))
: *this;
}
// Removes any redundant separators that might be in the pathname.
// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
// redundancies that might be in a pathname involving "." or "..".
// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
void FilePath::Normalize() {
if (pathname_.c_str() == NULL) {
pathname_ = "";
return;
}
const char* src = pathname_.c_str();
char* const dest = new char[pathname_.length() + 1];
char* dest_ptr = dest;
memset(dest_ptr, 0, pathname_.length() + 1);
while (*src != '\0') {
*dest_ptr = *src;
if (!IsPathSeparator(*src)) {
src++;
}
else {
#if GTEST_HAS_ALT_PATH_SEP_
if (*dest_ptr == kAlternatePathSeparator) {
*dest_ptr = kPathSeparator;
}
#endif
while (IsPathSeparator(*src)) {
src++;
}
}
dest_ptr++;
}
*dest_ptr = '\0';
pathname_ = dest;
delete[] dest;
}
} // namespace internal
} // namespace testing
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,884 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
#include "gtest/internal/gtest-port.h"
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h> // For TerminateProcess()
#elif GTEST_OS_WINDOWS
# include <io.h>
# include <sys/stat.h>
#else
# include <unistd.h>
#endif // GTEST_OS_WINDOWS_MOBILE
#if GTEST_OS_MAC
# include <mach/mach_init.h>
# include <mach/task.h>
# include <mach/vm_map.h>
#endif // GTEST_OS_MAC
#if GTEST_OS_QNX
# include <devctl.h>
# include <sys/procfs.h>
#endif // GTEST_OS_QNX
#include "gtest/gtest-spi.h"
#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"
// Indicates that this translation unit is part of Google Test's
// implementation. It must come before gtest-internal-inl.h is
// included, or there will be a compiler error. This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_
namespace testing {
namespace internal {
#if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
const int kStdOutFileno = 1;
const int kStdErrFileno = 2;
#else
const int kStdOutFileno = STDOUT_FILENO;
const int kStdErrFileno = STDERR_FILENO;
#endif // _MSC_VER
#if GTEST_OS_MAC
// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
const task_t task = mach_task_self();
mach_msg_type_number_t thread_count;
thread_act_array_t thread_list;
const kern_return_t status = task_threads(task, &thread_list, &thread_count);
if (status == KERN_SUCCESS) {
// task_threads allocates resources in thread_list and we need to free them
// to avoid leaks.
vm_deallocate(task,
reinterpret_cast<vm_address_t>(thread_list),
sizeof(thread_t) * thread_count);
return static_cast<size_t>(thread_count);
}
else {
return 0;
}
}
#elif GTEST_OS_QNX
// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
const int fd = open("/proc/self/as", O_RDONLY);
if (fd < 0) {
return 0;
}
procfs_info process_info;
const int status =
devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
close(fd);
if (status == EOK) {
return static_cast<size_t>(process_info.num_threads);
}
else {
return 0;
}
}
#else
size_t GetThreadCount() {
// There's no portable way to detect the number of threads, so we just
// return 0 to indicate that we cannot detect it.
return 0;
}
#endif // GTEST_OS_MAC
#if GTEST_USES_POSIX_RE
// Implements RE. Currently only needed for death tests.
RE::~RE() {
if (is_valid_) {
// regfree'ing an invalid regex might crash because the content
// of the regex is undefined. Since the regex's are essentially
// the same, one cannot be valid (or invalid) without the other
// being so too.
regfree(&partial_regex_);
regfree(&full_regex_);
}
free(const_cast<char*>(pattern_));
}
// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
if (!re.is_valid_) {
return false;
}
regmatch_t match;
return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
}
// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
if (!re.is_valid_) {
return false;
}
regmatch_t match;
return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
}
// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
pattern_ = posix::StrDup(regex);
// Reserves enough bytes to hold the regular expression used for a
// full match.
const size_t full_regex_len = strlen(regex) + 10;
char* const full_pattern = new char[full_regex_len];
snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
// We want to call regcomp(&partial_regex_, ...) even if the
// previous expression returns false. Otherwise partial_regex_ may
// not be properly initialized can may cause trouble when it's
// freed.
//
// Some implementation of POSIX regex (e.g. on at least some
// versions of Cygwin) doesn't accept the empty string as a valid
// regex. We change it to an equivalent form "()" to be safe.
if (is_valid_) {
const char* const partial_regex = (*regex == '\0') ? "()" : regex;
is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
}
EXPECT_TRUE(is_valid_)
<< "Regular expression \"" << regex
<< "\" is not a valid POSIX Extended regular expression.";
delete[] full_pattern;
}
#elif GTEST_USES_SIMPLE_RE
// Returns true iff ch appears anywhere in str (excluding the
// terminating '\0' character).
bool IsInSet(char ch, const char* str) {
return ch != '\0' && strchr(str, ch) != NULL;
}
// Returns true iff ch belongs to the given classification. Unlike
// similar functions in <ctype.h>, these aren't affected by the
// current locale.
bool IsAsciiDigit(char ch) {
return '0' <= ch && ch <= '9';
}
bool IsAsciiPunct(char ch) {
return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
}
bool IsRepeat(char ch) {
return IsInSet(ch, "?*+");
}
bool IsAsciiWhiteSpace(char ch) {
return IsInSet(ch, " \f\n\r\t\v");
}
bool IsAsciiWordChar(char ch) {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
('0' <= ch && ch <= '9') || ch == '_';
}
// Returns true iff "\\c" is a supported escape sequence.
bool IsValidEscape(char c) {
return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
}
// Returns true iff the given atom (specified by escaped and pattern)
// matches ch. The result is undefined if the atom is invalid.
bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
if (escaped) { // "\\p" where p is pattern_char.
switch (pattern_char) {
case 'd':
return IsAsciiDigit(ch);
case 'D':
return !IsAsciiDigit(ch);
case 'f':
return ch == '\f';
case 'n':
return ch == '\n';
case 'r':
return ch == '\r';
case 's':
return IsAsciiWhiteSpace(ch);
case 'S':
return !IsAsciiWhiteSpace(ch);
case 't':
return ch == '\t';
case 'v':
return ch == '\v';
case 'w':
return IsAsciiWordChar(ch);
case 'W':
return !IsAsciiWordChar(ch);
}
return IsAsciiPunct(pattern_char) && pattern_char == ch;
}
return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
}
// Helper function used by ValidateRegex() to format error messages.
std::string FormatRegexSyntaxError(const char* regex, int index) {
return (Message() << "Syntax error at index " << index
<< " in simple regular expression \"" << regex << "\": ").GetString();
}
// Generates non-fatal failures and returns false if regex is invalid;
// otherwise returns true.
bool ValidateRegex(const char* regex) {
if (regex == NULL) {
// TODO(wan@google.com): fix the source file location in the
// assertion failures to match where the regex is used in user
// code.
ADD_FAILURE() << "NULL is not a valid simple regular expression.";
return false;
}
bool is_valid = true;
// True iff ?, *, or + can follow the previous atom.
bool prev_repeatable = false;
for (int i = 0; regex[i]; i++) {
if (regex[i] == '\\') { // An escape sequence
i++;
if (regex[i] == '\0') {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
<< "'\\' cannot appear at the end.";
return false;
}
if (!IsValidEscape(regex[i])) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
<< "invalid escape sequence \"\\" << regex[i] << "\".";
is_valid = false;
}
prev_repeatable = true;
}
else { // Not an escape sequence.
const char ch = regex[i];
if (ch == '^' && i > 0) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'^' can only appear at the beginning.";
is_valid = false;
}
else if (ch == '$' && regex[i + 1] != '\0') {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'$' can only appear at the end.";
is_valid = false;
}
else if (IsInSet(ch, "()[]{}|")) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'" << ch << "' is unsupported.";
is_valid = false;
}
else if (IsRepeat(ch) && !prev_repeatable) {
ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
<< "'" << ch << "' can only follow a repeatable token.";
is_valid = false;
}
prev_repeatable = !IsInSet(ch, "^$?*+");
}
}
return is_valid;
}
// Matches a repeated regex atom followed by a valid simple regular
// expression. The regex atom is defined as c if escaped is false,
// or \c otherwise. repeat is the repetition meta character (?, *,
// or +). The behavior is undefined if str contains too many
// characters to be indexable by size_t, in which case the test will
// probably time out anyway. We are fine with this limitation as
// std::string has it too.
bool MatchRepetitionAndRegexAtHead(
bool escaped, char c, char repeat, const char* regex,
const char* str) {
const size_t min_count = (repeat == '+') ? 1 : 0;
const size_t max_count = (repeat == '?') ? 1 :
static_cast<size_t>(-1) - 1;
// We cannot call numeric_limits::max() as it conflicts with the
// max() macro on Windows.
for (size_t i = 0; i <= max_count; ++i) {
// We know that the atom matches each of the first i characters in str.
if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
// We have enough matches at the head, and the tail matches too.
// Since we only care about *whether* the pattern matches str
// (as opposed to *how* it matches), there is no need to find a
// greedy match.
return true;
}
if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) {
return false;
}
}
return false;
}
// Returns true iff regex matches a prefix of str. regex must be a
// valid simple regular expression and not start with "^", or the
// result is undefined.
bool MatchRegexAtHead(const char* regex, const char* str) {
if (*regex == '\0') { // An empty regex matches a prefix of anything.
return true;
}
// "$" only matches the end of a string. Note that regex being
// valid guarantees that there's nothing after "$" in it.
if (*regex == '$') {
return *str == '\0';
}
// Is the first thing in regex an escape sequence?
const bool escaped = *regex == '\\';
if (escaped) {
++regex;
}
if (IsRepeat(regex[1])) {
// MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
// here's an indirect recursion. It terminates as the regex gets
// shorter in each recursion.
return MatchRepetitionAndRegexAtHead(
escaped, regex[0], regex[1], regex + 2, str);
}
else {
// regex isn't empty, isn't "$", and doesn't start with a
// repetition. We match the first atom of regex with the first
// character of str and recurse.
return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
MatchRegexAtHead(regex + 1, str + 1);
}
}
// Returns true iff regex matches any substring of str. regex must be
// a valid simple regular expression, or the result is undefined.
//
// The algorithm is recursive, but the recursion depth doesn't exceed
// the regex length, so we won't need to worry about running out of
// stack space normally. In rare cases the time complexity can be
// exponential with respect to the regex length + the string length,
// but usually it's must faster (often close to linear).
bool MatchRegexAnywhere(const char* regex, const char* str) {
if (regex == NULL || str == NULL) {
return false;
}
if (*regex == '^') {
return MatchRegexAtHead(regex + 1, str);
}
// A successful match can be anywhere in str.
do {
if (MatchRegexAtHead(regex, str)) {
return true;
}
}
while (*str++ != '\0');
return false;
}
// Implements the RE class.
RE::~RE() {
free(const_cast<char*>(pattern_));
free(const_cast<char*>(full_pattern_));
}
// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
}
// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
}
// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
pattern_ = full_pattern_ = NULL;
if (regex != NULL) {
pattern_ = posix::StrDup(regex);
}
is_valid_ = ValidateRegex(regex);
if (!is_valid_) {
// No need to calculate the full pattern when the regex is invalid.
return;
}
const size_t len = strlen(regex);
// Reserves enough bytes to hold the regular expression used for a
// full match: we need space to prepend a '^', append a '$', and
// terminate the string with '\0'.
char* buffer = static_cast<char*>(malloc(len + 3));
full_pattern_ = buffer;
if (*regex != '^') {
*buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
}
// We don't use snprintf or strncpy, as they trigger a warning when
// compiled with VC++ 8.0.
memcpy(buffer, regex, len);
buffer += len;
if (len == 0 || regex[len - 1] != '$') {
*buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
}
*buffer = '\0';
}
#endif // GTEST_USES_POSIX_RE
const char kUnknownFile[] = "unknown file";
// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
const std::string file_name(file == NULL ? kUnknownFile : file);
if (line < 0) {
return file_name + ":";
}
#ifdef _MSC_VER
return file_name + "(" + StreamableToString(line) + "):";
#else
return file_name + ":" + StreamableToString(line) + ":";
#endif // _MSC_VER
}
// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
// Note that FormatCompilerIndependentFileLocation() does NOT append colon
// to the file location it produces, unlike FormatFileLocation().
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
const char* file, int line) {
const std::string file_name(file == NULL ? kUnknownFile : file);
if (line < 0) {
return file_name;
}
else {
return file_name + ":" + StreamableToString(line);
}
}
GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
: severity_(severity) {
const char* const marker =
severity == GTEST_INFO ? "[ INFO ]" :
severity == GTEST_WARNING ? "[WARNING]" :
severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
GetStream() << ::std::endl << marker << " "
<< FormatFileLocation(file, line).c_str() << ": ";
}
// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
GTestLog::~GTestLog() {
GetStream() << ::std::endl;
if (severity_ == GTEST_FATAL) {
fflush(stderr);
posix::Abort();
}
}
// Disable Microsoft deprecation warnings for POSIX functions called from
// this class (creat, dup, dup2, and close)
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4996)
#endif // _MSC_VER
#if GTEST_HAS_STREAM_REDIRECTION
// Object that captures an output stream (stdout/stderr).
class CapturedStream {
public:
// The ctor redirects the stream to a temporary file.
explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
# if GTEST_OS_WINDOWS
char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
const UINT success = ::GetTempFileNameA(temp_dir_path,
"gtest_redir",
0, // Generate unique file name.
temp_file_path);
GTEST_CHECK_(success != 0)
<< "Unable to create a temporary file in " << temp_dir_path;
const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
<< temp_file_path;
filename_ = temp_file_path;
# else
// There's no guarantee that a test has write access to the current
// directory, so we create the temporary file in the /tmp directory
// instead. We use /tmp on most systems, and /sdcard on Android.
// That's because Android doesn't have /tmp.
# if GTEST_OS_LINUX_ANDROID
// Note: Android applications are expected to call the framework's
// Context.getExternalStorageDirectory() method through JNI to get
// the location of the world-writable SD Card directory. However,
// this requires a Context handle, which cannot be retrieved
// globally from native code. Doing so also precludes running the
// code as part of a regular standalone executable, which doesn't
// run in a Dalvik process (e.g. when running it through 'adb shell').
//
// The location /sdcard is directly accessible from native code
// and is the only location (unofficially) supported by the Android
// team. It's generally a symlink to the real SD Card mount point
// which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
// other OEM-customized locations. Never rely on these, and always
// use /sdcard.
char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
# else
char name_template[] = "/tmp/captured_stream.XXXXXX";
# endif // GTEST_OS_LINUX_ANDROID
const int captured_fd = mkstemp(name_template);
filename_ = name_template;
# endif // GTEST_OS_WINDOWS
fflush(NULL);
dup2(captured_fd, fd_);
close(captured_fd);
}
~CapturedStream() {
remove(filename_.c_str());
}
std::string GetCapturedString() {
if (uncaptured_fd_ != -1) {
// Restores the original stream.
fflush(NULL);
dup2(uncaptured_fd_, fd_);
close(uncaptured_fd_);
uncaptured_fd_ = -1;
}
FILE* const file = posix::FOpen(filename_.c_str(), "r");
const std::string content = ReadEntireFile(file);
posix::FClose(file);
return content;
}
private:
// Reads the entire content of a file as an std::string.
static std::string ReadEntireFile(FILE* file);
// Returns the size (in bytes) of a file.
static size_t GetFileSize(FILE* file);
const int fd_; // A stream to capture.
int uncaptured_fd_;
// Name of the temporary file holding the stderr output.
::std::string filename_;
GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
};
// Returns the size (in bytes) of a file.
size_t CapturedStream::GetFileSize(FILE* file) {
fseek(file, 0, SEEK_END);
return static_cast<size_t>(ftell(file));
}
// Reads the entire content of a file as a string.
std::string CapturedStream::ReadEntireFile(FILE* file) {
const size_t file_size = GetFileSize(file);
char* const buffer = new char[file_size];
size_t bytes_last_read = 0; // # of bytes read in the last fread()
size_t bytes_read = 0; // # of bytes read so far
fseek(file, 0, SEEK_SET);
// Keeps reading the file until we cannot read further or the
// pre-determined file size is reached.
do {
bytes_last_read = fread(buffer + bytes_read, 1, file_size - bytes_read, file);
bytes_read += bytes_last_read;
}
while (bytes_last_read > 0 && bytes_read < file_size);
const std::string content(buffer, bytes_read);
delete[] buffer;
return content;
}
# ifdef _MSC_VER
# pragma warning(pop)
# endif // _MSC_VER
static CapturedStream* g_captured_stderr = NULL;
static CapturedStream* g_captured_stdout = NULL;
// Starts capturing an output stream (stdout/stderr).
void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
if (*stream != NULL) {
GTEST_LOG_(FATAL) << "Only one " << stream_name
<< " capturer can exist at a time.";
}
*stream = new CapturedStream(fd);
}
// Stops capturing the output stream and returns the captured string.
std::string GetCapturedStream(CapturedStream** captured_stream) {
const std::string content = (*captured_stream)->GetCapturedString();
delete *captured_stream;
*captured_stream = NULL;
return content;
}
// Starts capturing stdout.
void CaptureStdout() {
CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
}
// Starts capturing stderr.
void CaptureStderr() {
CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
}
// Stops capturing stdout and returns the captured string.
std::string GetCapturedStdout() {
return GetCapturedStream(&g_captured_stdout);
}
// Stops capturing stderr and returns the captured string.
std::string GetCapturedStderr() {
return GetCapturedStream(&g_captured_stderr);
}
#endif // GTEST_HAS_STREAM_REDIRECTION
#if GTEST_HAS_DEATH_TEST
// A copy of all command line arguments. Set by InitGoogleTest().
::std::vector<testing::internal::string> g_argvs;
static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
NULL; // Owned.
void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
if (g_injected_test_argvs != argvs) {
delete g_injected_test_argvs;
}
g_injected_test_argvs = argvs;
}
const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
if (g_injected_test_argvs != NULL) {
return *g_injected_test_argvs;
}
return g_argvs;
}
#endif // GTEST_HAS_DEATH_TEST
#if GTEST_OS_WINDOWS_MOBILE
namespace posix {
void Abort() {
DebugBreak();
TerminateProcess(GetCurrentProcess(), 1);
}
} // namespace posix
#endif // GTEST_OS_WINDOWS_MOBILE
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "GTEST_FOO" in the open-source version.
static std::string FlagToEnvVar(const char* flag) {
const std::string full_flag =
(Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
Message env_var;
for (size_t i = 0; i != full_flag.length(); i++) {
env_var << ToUpper(full_flag.c_str()[i]);
}
return env_var.GetString();
}
// Parses 'str' for a 32-bit signed integer. If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
// Parses the environment variable as a decimal integer.
char* end = NULL;
const long long_value = strtol(str, &end, 10); // NOLINT
// Has strtol() consumed all characters in the string?
if (*end != '\0') {
// No - an invalid character was encountered.
Message msg;
msg << "WARNING: " << src_text
<< " is expected to be a 32-bit integer, but actually"
<< " has value \"" << str << "\".\n";
printf("%s", msg.GetString().c_str());
fflush(stdout);
return false;
}
// Is the parsed value in the range of an Int32?
const Int32 result = static_cast<Int32>(long_value);
if (long_value == LONG_MAX || long_value == LONG_MIN ||
// The parsed value overflows as a long. (strtol() returns
// LONG_MAX or LONG_MIN when the input overflows.)
result != long_value
// The parsed value overflows as an Int32.
) {
Message msg;
msg << "WARNING: " << src_text
<< " is expected to be a 32-bit integer, but actually"
<< " has value " << str << ", which overflows.\n";
printf("%s", msg.GetString().c_str());
fflush(stdout);
return false;
}
*value = result;
return true;
}
// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromGTestEnv(const char* flag, bool default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = posix::GetEnv(env_var.c_str());
return string_value == NULL ?
default_value : strcmp(string_value, "0") != 0;
}
// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = posix::GetEnv(env_var.c_str());
if (string_value == NULL) {
// The environment variable is not set.
return default_value;
}
Int32 result = default_value;
if (!ParseInt32(Message() << "Environment variable " << env_var,
string_value, &result)) {
printf("The default value %s is used.\n",
(Message() << default_value).GetString().c_str());
fflush(stdout);
return default_value;
}
return result;
}
// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromGTestEnv(const char* flag, const char* default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = posix::GetEnv(env_var.c_str());
return value == NULL ? default_value : value;
}
} // namespace internal
} // namespace testing
@@ -0,0 +1,395 @@
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// It uses the << operator when possible, and prints the bytes in the
// object otherwise. A user can override its behavior for a class
// type Foo by defining either operator<<(::std::ostream&, const Foo&)
// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
// defines Foo.
#include "gtest/gtest-printers.h"
#include <ctype.h>
#include <stdio.h>
#include <ostream> // NOLINT
#include <string>
#include "gtest/internal/gtest-port.h"
namespace testing {
namespace {
using ::std::ostream;
// Prints a segment of bytes in the given object.
void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
size_t count, ostream* os) {
char text[5] = "";
for (size_t i = 0; i != count; i++) {
const size_t j = start + i;
if (i != 0) {
// Organizes the bytes into groups of 2 for easy parsing by
// human.
if ((j % 2) == 0) {
*os << ' ';
}
else {
*os << '-';
}
}
GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
*os << text;
}
}
// Prints the bytes in the given value to the given ostream.
void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
ostream* os) {
// Tells the user how big the object is.
*os << count << "-byte object <";
const size_t kThreshold = 132;
const size_t kChunkSize = 64;
// If the object size is bigger than kThreshold, we'll have to omit
// some details by printing only the first and the last kChunkSize
// bytes.
// TODO(wan): let the user control the threshold using a flag.
if (count < kThreshold) {
PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
}
else {
PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
*os << " ... ";
// Rounds up to 2-byte boundary.
const size_t resume_pos = (count - kChunkSize + 1) / 2 * 2;
PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
}
*os << ">";
}
} // namespace
namespace internal2 {
// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
// given object. The delegation simplifies the implementation, which
// uses the << operator and thus is easier done outside of the
// ::testing::internal namespace, which contains a << operator that
// sometimes conflicts with the one in STL.
void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
ostream* os) {
PrintBytesInObjectToImpl(obj_bytes, count, os);
}
} // namespace internal2
namespace internal {
// Depending on the value of a char (or wchar_t), we print it in one
// of three formats:
// - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
// - as a hexidecimal escape sequence (e.g. '\x7F'), or
// - as a special escape sequence (e.g. '\r', '\n').
enum CharFormat {
kAsIs,
kHexEscape,
kSpecialEscape
};
// Returns true if c is a printable ASCII character. We test the
// value of c directly instead of calling isprint(), which is buggy on
// Windows Mobile.
inline bool IsPrintableAscii(wchar_t c) {
return 0x20 <= c && c <= 0x7E;
}
// Prints a wide or narrow char c as a character literal without the
// quotes, escaping it when necessary; returns how c was formatted.
// The template argument UnsignedChar is the unsigned version of Char,
// which is the type of c.
template <typename UnsignedChar, typename Char>
static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
switch (static_cast<wchar_t>(c)) {
case L'\0':
*os << "\\0";
break;
case L'\'':
*os << "\\'";
break;
case L'\\':
*os << "\\\\";
break;
case L'\a':
*os << "\\a";
break;
case L'\b':
*os << "\\b";
break;
case L'\f':
*os << "\\f";
break;
case L'\n':
*os << "\\n";
break;
case L'\r':
*os << "\\r";
break;
case L'\t':
*os << "\\t";
break;
case L'\v':
*os << "\\v";
break;
default:
if (IsPrintableAscii(c)) {
*os << static_cast<char>(c);
return kAsIs;
}
else {
*os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
return kHexEscape;
}
}
return kSpecialEscape;
}
// Prints a wchar_t c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
switch (c) {
case L'\'':
*os << "'";
return kAsIs;
case L'"':
*os << "\\\"";
return kSpecialEscape;
default:
return PrintAsCharLiteralTo<wchar_t>(c, os);
}
}
// Prints a char c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
return PrintAsStringLiteralTo(
static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
}
// Prints a wide or narrow character c and its code. '\0' is printed
// as "'\\0'", other unprintable characters are also properly escaped
// using the standard C++ escape sequence. The template argument
// UnsignedChar is the unsigned version of Char, which is the type of c.
template <typename UnsignedChar, typename Char>
void PrintCharAndCodeTo(Char c, ostream* os) {
// First, print c as a literal in the most readable form we can find.
*os << ((sizeof(c) > 1) ? "L'" : "'");
const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
*os << "'";
// To aid user debugging, we also print c's code in decimal, unless
// it's 0 (in which case c was printed as '\\0', making the code
// obvious).
if (c == 0) {
return;
}
*os << " (" << static_cast<int>(c);
// For more convenience, we print c's code again in hexidecimal,
// unless c was already printed in the form '\x##' or the code is in
// [1, 9].
if (format == kHexEscape || (1 <= c && c <= 9)) {
// Do nothing.
}
else {
*os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
}
*os << ")";
}
void PrintTo(unsigned char c, ::std::ostream* os) {
PrintCharAndCodeTo<unsigned char>(c, os);
}
void PrintTo(signed char c, ::std::ostream* os) {
PrintCharAndCodeTo<unsigned char>(c, os);
}
// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its code. L'\0' is printed as "L'\\0'".
void PrintTo(wchar_t wc, ostream* os) {
PrintCharAndCodeTo<wchar_t>(wc, os);
}
// Prints the given array of characters to the ostream. CharType must be either
// char or wchar_t.
// The array starts at begin, the length is len, it may include '\0' characters
// and may not be NUL-terminated.
template <typename CharType>
static void PrintCharsAsStringTo(
const CharType* begin, size_t len, ostream* os) {
const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
*os << kQuoteBegin;
bool is_previous_hex = false;
for (size_t index = 0; index < len; ++index) {
const CharType cur = begin[index];
if (is_previous_hex && IsXDigit(cur)) {
// Previous character is of '\x..' form and this character can be
// interpreted as another hexadecimal digit in its number. Break string to
// disambiguate.
*os << "\" " << kQuoteBegin;
}
is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
}
*os << "\"";
}
// Prints a (const) char/wchar_t array of 'len' elements, starting at address
// 'begin'. CharType must be either char or wchar_t.
template <typename CharType>
static void UniversalPrintCharArray(
const CharType* begin, size_t len, ostream* os) {
// The code
// const char kFoo[] = "foo";
// generates an array of 4, not 3, elements, with the last one being '\0'.
//
// Therefore when printing a char array, we don't print the last element if
// it's '\0', such that the output matches the string literal as it's
// written in the source code.
if (len > 0 && begin[len - 1] == '\0') {
PrintCharsAsStringTo(begin, len - 1, os);
return;
}
// If, however, the last element in the array is not '\0', e.g.
// const char kFoo[] = { 'f', 'o', 'o' };
// we must print the entire array. We also print a message to indicate
// that the array is not NUL-terminated.
PrintCharsAsStringTo(begin, len, os);
*os << " (no terminating NUL)";
}
// Prints a (const) char array of 'len' elements, starting at address 'begin'.
void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
UniversalPrintCharArray(begin, len, os);
}
// Prints a (const) wchar_t array of 'len' elements, starting at address
// 'begin'.
void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
UniversalPrintCharArray(begin, len, os);
}
// Prints the given C string to the ostream.
void PrintTo(const char* s, ostream* os) {
if (s == NULL) {
*os << "NULL";
}
else {
*os << ImplicitCast_<const void*>(s) << " pointing to ";
PrintCharsAsStringTo(s, strlen(s), os);
}
}
// MSVC compiler can be configured to define whar_t as a typedef
// of unsigned short. Defining an overload for const wchar_t* in that case
// would cause pointers to unsigned shorts be printed as wide strings,
// possibly accessing more memory than intended and causing invalid
// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
// wchar_t is implemented as a native type.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Prints the given wide C string to the ostream.
void PrintTo(const wchar_t* s, ostream* os) {
if (s == NULL) {
*os << "NULL";
}
else {
*os << ImplicitCast_<const void*>(s) << " pointing to ";
PrintCharsAsStringTo(s, wcslen(s), os);
}
}
#endif // wchar_t is native
// Prints a ::string object.
#if GTEST_HAS_GLOBAL_STRING
void PrintStringTo(const ::string& s, ostream* os) {
PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif // GTEST_HAS_GLOBAL_STRING
void PrintStringTo(const ::std::string& s, ostream* os) {
PrintCharsAsStringTo(s.data(), s.size(), os);
}
// Prints a ::wstring object.
#if GTEST_HAS_GLOBAL_WSTRING
void PrintWideStringTo(const ::wstring& s, ostream* os) {
PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif // GTEST_HAS_GLOBAL_WSTRING
#if GTEST_HAS_STD_WSTRING
void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif // GTEST_HAS_STD_WSTRING
} // namespace internal
} // namespace testing
@@ -0,0 +1,112 @@
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// The Google C++ Testing Framework (Google Test)
#include "gtest/gtest-test-part.h"
// Indicates that this translation unit is part of Google Test's
// implementation. It must come before gtest-internal-inl.h is
// included, or there will be a compiler error. This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_
namespace testing {
using internal::GetUnitTestImpl;
// Gets the summary of the failure message by omitting the stack trace
// in it.
std::string TestPartResult::ExtractSummary(const char* message) {
const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
return stack_trace == NULL ? message :
std::string(message, stack_trace);
}
// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
return os
<< result.file_name() << ":" << result.line_number() << ": "
<< (result.type() == TestPartResult::kSuccess ? "Success" :
result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
"Non-fatal failure") << ":\n"
<< result.message() << std::endl;
}
// Appends a TestPartResult to the array.
void TestPartResultArray::Append(const TestPartResult& result) {
array_.push_back(result);
}
// Returns the TestPartResult at the given index (0-based).
const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
if (index < 0 || index >= size()) {
printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
internal::posix::Abort();
}
return array_[index];
}
// Returns the number of TestPartResult objects in the array.
int TestPartResultArray::size() const {
return static_cast<int>(array_.size());
}
namespace internal {
HasNewFatalFailureHelper::HasNewFatalFailureHelper()
: has_new_fatal_failure_(false),
original_reporter_(GetUnitTestImpl()->
GetTestPartResultReporterForCurrentThread()) {
GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
}
HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
original_reporter_);
}
void HasNewFatalFailureHelper::ReportTestPartResult(
const TestPartResult& result) {
if (result.fatally_failed()) {
has_new_fatal_failure_ = true;
}
original_reporter_->ReportTestPartResult(result);
}
} // namespace internal
} // namespace testing
@@ -0,0 +1,117 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
#include "gtest/gtest-typed-test.h"
#include "gtest/gtest.h"
namespace testing {
namespace internal {
#if GTEST_HAS_TYPED_TEST_P
// Skips to the first non-space char in str. Returns an empty string if str
// contains only whitespace characters.
static const char* SkipSpaces(const char* str) {
while (IsSpace(*str)) {
str++;
}
return str;
}
// Verifies that registered_tests match the test names in
// defined_test_names_; returns registered_tests if successful, or
// aborts the program otherwise.
const char* TypedTestCasePState::VerifyRegisteredTestNames(
const char* file, int line, const char* registered_tests) {
typedef ::std::set<const char*>::const_iterator DefinedTestIter;
registered_ = true;
// Skip initial whitespace in registered_tests since some
// preprocessors prefix stringizied literals with whitespace.
registered_tests = SkipSpaces(registered_tests);
Message errors;
::std::set<std::string> tests;
for (const char* names = registered_tests; names != NULL;
names = SkipComma(names)) {
const std::string name = GetPrefixUntilComma(names);
if (tests.count(name) != 0) {
errors << "Test " << name << " is listed more than once.\n";
continue;
}
bool found = false;
for (DefinedTestIter it = defined_test_names_.begin();
it != defined_test_names_.end();
++it) {
if (name == *it) {
found = true;
break;
}
}
if (found) {
tests.insert(name);
}
else {
errors << "No test named " << name
<< " can be found in this test case.\n";
}
}
for (DefinedTestIter it = defined_test_names_.begin();
it != defined_test_names_.end();
++it) {
if (tests.count(*it) == 0) {
errors << "You forgot to list test " << *it << ".\n";
}
}
const std::string& errors_str = errors.GetString();
if (errors_str != "") {
fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
errors_str.c_str());
fflush(stderr);
posix::Abort();
}
return registered_tests;
}
#endif // GTEST_HAS_TYPED_TEST_P
} // namespace internal
} // namespace testing
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,50 @@
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
int hsa_arg_cnt;
char* hsa_arg_list[32];
#include "gtest/gtest.h"
GTEST_API_ int main(int argc, char** argv) {
printf("Running main() from gtest_main.cc\n");
testing::InitGoogleTest(&argc, argv);
// Copy cmdline args for Hsa into a globally visible symbol
hsa_arg_cnt = argc;
for (int jdx = 0; jdx < argc; jdx++) {
hsa_arg_list[jdx] = argv[jdx];
}
return RUN_ALL_TESTS();
}
+254
View File
@@ -0,0 +1,254 @@
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 2.8.0)
#
# GCC 4.8 or higher compiler required.
#
#
# Setup build environment
#
# 1) Set env. variable specifying the location of ROCR header files
#
# export ROCR_DIR="Root for RocR install"
#
# 2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
# If not set, the default value is "Debug" is bound.
#
# export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
#
# 3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
# If not set, the default value of "64" is bound.
#
# export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
#
# 4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
# gfx900, ...)
#
# 5) Set env. variables OPENC_DIR and and OPENC_VER to the OpenCL install
# root and OpenCL version, respectively.
#
# Building rocrtst Suite
#
# 1) Create build folder e.g. "rocrtst/build" - any name will do
# 2) Cd into build folder
# 3) Run "cmake .."
# 4) Run "make"
#
#############################
# COMMON AREA
#############################
#
# Currently support for Windows platform is not present
#
if(WIN32)
message("This sample is not supported on Windows platform")
return()
endif()
#
# Process environment variables relating to Build type, size and RT version
#
string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
if("${tmp}" STREQUAL release)
set(BUILD_TYPE "Release")
set(ISDEBUG 0)
else()
set(BUILD_TYPE "Debug")
set(ISDEBUG 1)
endif()
if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
set (ONLY64STR "")
set (IS64BIT 0)
else()
set (ONLY64STR "64")
set (IS64BIT 1)
endif()
set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include)
set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
#
# Determine ROCR Header files are present
#
if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
message("ERROR: Environment variable ROCR_INC_DIR pointing to ROCR headers is not set")
return()
endif()
# Determine ROCR Library files are present
#
if (${IS64BIT} EQUAL 0)
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
message("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
return()
endif()
else()
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
message("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
return()
endif()
endif()
if (DEFINED ENV{OPENCL_DIR})
set(CLANG $ENV{OPENCL_DIR}/bin/x86_64/clang)
set(OPENCL_DIR $ENV{OPENCL_DIR})
if (NOT EXISTS ${CLANG})
message("ERROR: path to clang (${CLANG}) is not valid. Is env. variable OPENCL_DIR correct?")
return()
endif()
if (DEFINED ENV{OPENCL_VER})
set(OPENCL_VER $ENV{OPENCL_VER})
else()
message("OPENCL_VER environment variable is not set. Using default")
set(OPENCL_VER "2.0")
endif()
else()
message("WARNING: OPENCL_DIR environment variable is not set. Kernels will not be built.")
endif()
if (DEFINED ENV{TARGET_DEVICE})
set(TARGET_DEVICE $ENV{TARGET_DEVICE})
else()
message("ERROR: TARGET_DEVICE environment variable is not defined.")
message("Please define a valid clang target (e.g., gfx803, gfx900,...).")
return()
endif()
#
# Set Name for Samples Project
#
set(PROJECT_NAME "sample${ONLY64STR}")
project (${PROJECT_NAME})
#
# Print out the build configuration being used:
#
# Build Src directory
# Build Binary directory
# Build Type: Debug Vs Release, 32 Vs 64
# Compiler Version, etc
#
message("")
message("Build Configuration:")
message("-------------IS64BIT: " ${IS64BIT})
message("-----------BuildType: " ${BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("-------Target Device: " ${TARGET_DEVICE})
message("----------Clang path: " ${CLANG})
message("-------OpenCL version " ${OPENCL_VER})
message("")
#
# Set the build type based on user input
#
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )
#
# Compiler pre-processor definitions.
#
# Define MACRO "DEBUG" if build type is "Debug"
if(${BUILD_TYPE} STREQUAL "Debug")
add_definitions(-DDEBUG)
endif()
add_definitions(-D__linux__)
add_definitions(-DLITTLEENDIAN_CPU=1)
#
# Linux Compiler options
#
set(CMAKE_CXX_FLAGS "-std=c++11 ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
#
# Extend the compiler flags for 64-bit builds
#
if (IS64BIT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
#
# Add compiler flags to include symbol information for debug builds
#
if(ISDEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
endif()
message("ISDEBUG STEP:Done")
#
# Linux Linker options
#
#set(CMAKE_EXE_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl ")
#set(CMAKE_EXE_LINKER_FLAGS "-Wl,-soname=$(CORE_RUNTIME_NAME).so.1 ")
#
# Specify the directory containing various libraries of ROCR
# to be linked against for building ROC Perf applications
#
link_directories(${ROCR_LIB_DIR})
#
# Extend the list of libraries to be used for linking ROC Perf Apps
#
set(ROCR_LIBS ${ROCR_LIBS} hsa-runtime${ONLY64STR})
set(ROCR_LIBS ${ROCR_LIBS} hsa-runtime-tools${ONLY64STR})
message(${ROCR_LIBS})
include_directories(${ROCR_INC_DIR})
include_directories($ENV{OPENCL_DIR}/include/opencl$ENV{OPENCL_VER})
function(process_sample S_NAME)
set(SNAME_EXE "${S_NAME}_${PROJECT_NAME}")
set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")
set(sample_kernels sampleKernels)
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEVICE} -include ${OPENCL_DIR}/include/opencl-c.h ${BITCODE_LIBS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${PROJECT_BINARY_DIR}/${SNAME_KERNEL}")
add_custom_target(sample_kernels ${CLANG} ${CLANG_ARG_LIST}
COMMENT "BUILDING KERNEL..."
VERBATIM)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/${S_NAME} S_NAME_SOURCES)
add_executable(${SNAME_EXE} ${S_NAME_SOURCES})
target_link_libraries(${SNAME_EXE} ${ROCR_LIBS} c stdc++ dl pthread rt)
endfunction(process_sample)
###########################
# SAMPLE SPECIFIC SECTION
###########################
set(KERN_SUFFIX "kernels.hsaco")
set(BITCODE_PREF "-Xclang -mlink-bitcode-file -Xclang")
set(BITCODE_PREF "${BITCODE_PREF} ${OPENCL_DIR}/lib/x86_64/bitcode")
# Binary Search
set(BITCODE_LIBS "${BITCODE_PREF}/opencl.amdgcn.bc")
set(BITCODE_LIBS "${BITCODE_LIBS} ${BITCODE_PREF}/ockl.amdgcn.bc")
set(BITCODE_LIBS "${BITCODE_LIBS} ${BITCODE_PREF}/ocml.amdgcn.bc")
set(CL_FILE_LIST "${PROJECT_SOURCE_DIR}/binary_search/binary_search_kernels.cl")
process_sample("binary_search")
install(TARGETS ${SAMPLE_EXE}
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,19 @@
To build the sample, first export the following environment variables:
export ROCR_DIR=<root of RocR install; for RocR includes and libraries>
export OPENCL_DIR=<root of OpenCL install; for required clang and bitcode libs>
export OPENCL_VER=<OpenCL version; e.g., "2.0">
export TARGET_DEVICE=<GPU type; e.g., "gfx803" or "gfx900">
Next, do the following:
mkdir build
cd build
cmake ..
Finally, do the following to build the application and respective kernels:
make
make sample_kernels
@@ -0,0 +1,881 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <string>
#include <iostream>
#include <climits>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#define RET_IF_HSA_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << ". Call returned " << err << std::endl; \
return (err); \
} \
}
static const uint32_t kBinarySearchLength = 512;
static const uint32_t kBinarySearchFindMe = 108;
static const uint32_t kWorkGroupSize = 256;
// Hold all the info specific to binary search
typedef struct BinarySearch {
// Binary Search parameters
uint32_t length;
uint32_t work_group_size;
uint32_t work_grid_size;
uint32_t num_sub_divisions;
uint32_t find_me;
// Buffers needed for this application
uint32_t* input;
uint32_t* input_arr;
uint32_t* input_arr_local;
uint32_t* output;
// Keneral argument buffers and addresses
void* kern_arg_buffer; // Begin of allocated memory
// this pointer to be deallocated
void* kern_arg_address; // Properly aligned address to be used in aql
// packet (don't use for deallocation)
// Kernel code
std::string kernel_file_name;
std::string kernel_name;
uint32_t kernarg_size;
uint32_t kernarg_align;
// HSA/RocR objects needed for this application
hsa_agent_t gpu_dev;
hsa_agent_t cpu_dev;
hsa_signal_t signal;
hsa_queue_t* queue;
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t gpu_pool;
hsa_amd_memory_pool_t kern_arg_pool;
// Other items we need to populate AQL packet
uint64_t kernel_object;
uint32_t group_segment_size; ///< Kernel group seg size
uint32_t private_segment_size; ///< Kernel private seg size
} BinarySearch;
void InitializeBinarySearch(BinarySearch* bs) {
bs->kernel_file_name = "./binary_search_kernels.hsaco";
bs->kernel_name = "binarySearch";
bs->length = 512;
bs->find_me = 108;
bs->work_group_size = 256;
bs->num_sub_divisions = bs->length / bs->work_group_size;
}
// This function is called by the call-back functions used to find an agent of
// the specified hsa_device_type_t. Note that it cannot be called directly from
// hsa_iterate_agents() as it does not match the prototype of the call-back
// function. It must be wrapped by a function with the correct prototype.
//
// Return values:
// HSA_STATUS_INFO_BREAK -- "agent" is of the specified type (dev_type)
// HSA_STATUS_SUCCESS -- "agent" is not of the specified type
// Other -- Some error occurred
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
hsa_device_type_t dev_type) {
assert(data != nullptr);
if (data == nullptr) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// See if the provided agent matches the input type (dev_type)
hsa_device_type_t hsa_device_type;
hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
&hsa_device_type);
RET_IF_HSA_ERR(hsa_error_code);
if (hsa_device_type == dev_type) {
*(reinterpret_cast<hsa_agent_t*>(data)) = agent;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
}
// This is the call-back function used to find a GPU type agent. Note that the
// prototype of this function is dictated by the HSA specification
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
}
// This is the call-back function used to find a CPU type agent. Note that the
// prototype of this function is dictated by the HSA specification
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
}
// Find the CPU and GPU agents we need to run this sample, and save them in the
// BinarySearch structure for later use.
hsa_status_t FindDevices(BinarySearch* bs) {
hsa_status_t err;
// Note that hsa_iterate_agents iterate through all known agents until
// HSA_STATUS_SUCCESS is not returned. The call-backs are implemented such
// that HSA_STATUS_INFO_BREAK means we found an agent of the specified type.
// This value is returned by hsa_iterate_agents.
bs->gpu_dev.handle = 0;
err = hsa_iterate_agents(FindGPUDevice, &bs->gpu_dev);
if (err != HSA_STATUS_INFO_BREAK) {
return HSA_STATUS_ERROR;
}
bs->cpu_dev.handle = 0;
err = hsa_iterate_agents(FindCPUDevice, &bs->cpu_dev);
if (err != HSA_STATUS_INFO_BREAK) {
return HSA_STATUS_ERROR;
}
if (0 == bs->gpu_dev.handle) {
std::cout << "GPU Device is not Created properly!" << std::endl;
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
}
if (0 == bs->cpu_dev.handle) {
std::cout << "CPU Device is not Created properly!" << std::endl;
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
}
return HSA_STATUS_SUCCESS;
}
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
// Note that this function does not match the required prototype for the
// hsa_amd_agent_iterate_memory_pools call back function, and therefore must be
// wrapped by a function with the correct prototype.
static hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
RET_IF_HSA_ERR(err);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
RET_IF_HSA_ERR(err);
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if ((karg_st == 0 && kern_arg) ||
(karg_st != 0 && !kern_arg)) {
return HSA_STATUS_SUCCESS;
}
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
return HSA_STATUS_INFO_BREAK;
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, true);
}
// Find memory pools that we will need to allocate from for this sample
// application. We will need memory associated with the host CPU, the GPU
// executing the kernels, and for kernel arguments. This function will
// save the found pools to the BinarySearch structure for use elsewhere
// in this program.
hsa_status_t FindPools(BinarySearch* bs) {
hsa_status_t err;
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool,
&bs->cpu_pool);
if (err != HSA_STATUS_INFO_BREAK) {
return HSA_STATUS_ERROR;
}
err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool,
&bs->gpu_pool);
if (err != HSA_STATUS_INFO_BREAK) {
return HSA_STATUS_ERROR;
}
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev,
FindKernArgPool, &bs->kern_arg_pool);
if (err != HSA_STATUS_INFO_BREAK) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
// Once the needed memory pools have been found and the BinarySearch structure
// has been updated with these handles, this function is then used to allocate
// memory from those pools.
// Devices with which a pool is associated already have access to the pool.
// However, other devices may also need to read or write to that memory. Below,
// we see how we can grant access to other devices to address this issue.
hsa_status_t AllocateAndInitBuffers(BinarySearch* bs) {
hsa_status_t err;
uint32_t out_length = 4 * sizeof(uint32_t);
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
// In all of these examples, we want both the cpu and gpu to have access to
// the buffer in question. We use the array of agents below in the susequent
// calls to hsa_amd_agents_allow_access() for this purpose.
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
reinterpret_cast<void**>(&bs->input));
RET_IF_HSA_ERR(err);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input);
RET_IF_HSA_ERR(err);
(void)memset(bs->input, 0, in_length);
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0,
reinterpret_cast<void**>(&bs->output));
RET_IF_HSA_ERR(err);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output);
RET_IF_HSA_ERR(err);
(void)memset(bs->input, 0, in_length);
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
reinterpret_cast<void**>(&bs->input_arr));
RET_IF_HSA_ERR(err);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr);
RET_IF_HSA_ERR(err);
(void)memset(bs->input, 0, in_length);
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
reinterpret_cast<void**>(&bs->input_arr_local));
RET_IF_HSA_ERR(err);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local);
RET_IF_HSA_ERR(err);
// Binary-search application specific code...
// Initialize input buffer with random values in an increasing order
uint32_t max = bs->length * 20;
bs->input[0] = 0;
uint32_t seed = (unsigned int)time(NULL);
srand(seed);
for (uint32_t i = 1; i < bs->length; ++i) {
bs->input[i] = bs->input[i - 1] +
static_cast<uint32_t>(max * rand_r(&seed) / static_cast<float>(RAND_MAX));
}
// #define VERBOSE 1
#ifdef VERBOSE
std::cout << "Input array values:" << std::endl;
for (uint32_t i = 0; i < bs->length; ++i) {
std::cout << "input[" << i << "] = " << bs->input[i] << " ";
if (i % 4 == 0) {
std::cout << std::endl;
}
}
std::cout << std::endl;
#endif
return err;
}
// The code in this function illustrates how to load a kernel from
// pre-compiled code. The goal is to get a handle that can be later
// used in an AQL packet and also to extract information about kernel
// that we will need. All of the information hand kernel handle will
// be saved to the BinarySearch structure. It will be used when we
// populate the AQL packet.
hsa_status_t LoadKernelFromObjFile(BinarySearch* bs) {
hsa_status_t err;
hsa_code_object_reader_t code_obj_rdr = {0};
hsa_executable_t executable = {0};
hsa_file_t file_handle = open(bs->kernel_file_name.c_str(), O_RDONLY);
if (file_handle == -1) {
std::cout << "failed to open " << bs->kernel_file_name.c_str() <<
" at line " << __LINE__ << ", errno: " << errno << std::endl;
return HSA_STATUS_ERROR;
}
err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
RET_IF_HSA_ERR(err);
close(file_handle);
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable);
RET_IF_HSA_ERR(err);
err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev,
code_obj_rdr, NULL, NULL);
RET_IF_HSA_ERR(err);
err = hsa_executable_freeze(executable, NULL);
RET_IF_HSA_ERR(err);
hsa_executable_symbol_t kern_sym;
err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(),
bs->gpu_dev, 0, &kern_sym);
RET_IF_HSA_ERR(err);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&bs->kernel_object);
RET_IF_HSA_ERR(err);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&bs->private_segment_size);
RET_IF_HSA_ERR(err);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&bs->group_segment_size);
RET_IF_HSA_ERR(err);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &bs->kernarg_size);
RET_IF_HSA_ERR(err);
err = hsa_executable_symbol_get_info(kern_sym,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
&bs->kernarg_align);
RET_IF_HSA_ERR(err);
return err;
}
// This function shows how to do an asynchronous copy. We have to create a signal
// and use the signal to notify us when the copy has completed.
hsa_status_t AgentMemcpy(void* dst, const void* src,
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
hsa_signal_t s;
hsa_status_t err;
err = hsa_signal_create(1, 0, NULL, &s);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
RET_IF_HSA_ERR(err);
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
err = HSA_STATUS_ERROR;
std::cout << "Async copy signal error" << std::endl;
RET_IF_HSA_ERR(err);
}
err = hsa_signal_destroy(s);
RET_IF_HSA_ERR(err);
return err;
}
// AlignDown and AlignUp are 2 utility functions we use to find an aligned
// boundary either below or above a given value (address). The function will
// return a value that has the specified alignment.
static intptr_t
AlignDown(intptr_t value, size_t alignment) {
return (intptr_t) (value & ~(alignment - 1));
}
static void*
AlignUp(void* value, size_t alignment) {
return reinterpret_cast<void*>(
AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
alignment));
}
// This function populates the AQL patch with the information
// we have collected and stored in the BinarySearch structure thus far.
void PopulateAQLPacket(BinarySearch const* bs,
hsa_kernel_dispatch_packet_t* aql) {
aql->header = 0; // Dummy val. for now. Set this right before doorbell ring
aql->setup = 1;
aql->workgroup_size_x = bs->work_group_size;
aql->workgroup_size_y = 1;
aql->workgroup_size_z = 1;
aql->grid_size_x = bs->work_grid_size;
aql->grid_size_y = 1;
aql->grid_size_z = 1;
aql->private_segment_size = bs->private_segment_size;
aql->group_segment_size = bs->group_segment_size;
aql->kernel_object = bs->kernel_object;
aql->kernarg_address = bs->kern_arg_address;
aql->completion_signal = bs->signal;
return;
}
/*
* Write everything in the provided AQL packet to the queue except the first 32
* bits which include the header and setup fields. That should be done
* last.
*/
void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
hsa_queue_t* q) {
void* queue_base = q->base_address;
const uint32_t queue_mask = q->size - 1;
uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1);
hsa_kernel_dispatch_packet_t* queue_aql_packet;
queue_aql_packet =
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
[que_idx & queue_mask];
queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x;
queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y;
queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z;
queue_aql_packet->grid_size_x = in_aql->grid_size_x;
queue_aql_packet->grid_size_y = in_aql->grid_size_y;
queue_aql_packet->grid_size_z = in_aql->grid_size_z;
queue_aql_packet->private_segment_size = in_aql->private_segment_size;
queue_aql_packet->group_segment_size = in_aql->group_segment_size;
queue_aql_packet->kernel_object = in_aql->kernel_object;
queue_aql_packet->kernarg_address = in_aql->kernarg_address;
queue_aql_packet->completion_signal = in_aql->completion_signal;
}
// This function allocates memory from the kern_arg pool we already found, and
// then sets the argument values needed by the kernel code.
hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args,
size_t arg_size, void** aql_buf_ptr) {
void* kern_arg_buf = nullptr;
hsa_status_t err;
size_t buf_size;
size_t req_align;
// The kernel code must be written to memory at the correct alignment. We
// already queried the executable to get the correct alignment, which is
// stored in bs->kernarg_align. In case the memory returned from
// hsa_amd_memory_pool is not of the correct alignment, we request a little
// more than what we need in case we need to adjust.
req_align = bs->kernarg_align;
// Allocate enough extra space for alignment adjustments if ncessary
buf_size = arg_size + (req_align << 1);
err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0,
reinterpret_cast<void**>(&kern_arg_buf));
RET_IF_HSA_ERR(err);
// Address of the allocated buffer
bs->kern_arg_buffer = kern_arg_buf;
// Addr. of kern arg start.
bs->kern_arg_address = AlignUp(kern_arg_buf, req_align);
assert(arg_size >= bs->kernarg_size);
assert(((uintptr_t)bs->kern_arg_address + arg_size) <
((uintptr_t)bs->kern_arg_buffer + buf_size));
(void)memcpy(bs->kern_arg_address, args, arg_size);
RET_IF_HSA_ERR(err);
// Make sure both the CPU and GPU can access the kernel arguments
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer);
RET_IF_HSA_ERR(err);
// Save this info in our BinarySearch structure for later.
*aql_buf_ptr = bs->kern_arg_address;
return HSA_STATUS_SUCCESS;
}
// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup << 16), __ATOMIC_RELEASE);
}
// Once all the required data for kernel execution is collected (in this
// application it is stored in the BinarySearch structure) we can put it in
// an AQL packet and ring the queue door bell to tell the command processor to
// execute it.
hsa_status_t Run(BinarySearch* bs) {
hsa_status_t err;
std::cout << "Executing kernel " << bs->kernel_name << std::endl;
// Adjust the size of workgroup
// This is mostly application specific.
if (bs->work_group_size > 64) {
bs->work_group_size = 64;
bs->num_sub_divisions = bs->length / bs->work_group_size;
if (bs->num_sub_divisions < bs->work_group_size) {
bs->num_sub_divisions = bs->work_group_size;
}
bs->work_grid_size = bs->num_sub_divisions;
}
// Explanation of BinarySearch algorithm.
/*
* Since a plain binary search on the GPU would not achieve much benefit
* over the GPU we are doing an N'ary search. We split the array into N
* segments every pass and therefore get log (base N) passes instead of log
* (base 2) passes.
*
* In every pass, only the thread that can potentially have the element we
* are looking for writes to the output array. For ex: if we are looking to
* find 4567 in the array and every thread is searching over a segment of
* 1000 values and the input array is 1, 2, 3, 4,... then the first thread
* is searching in 1 to 1000, the second one from 1001 to 2000, etc. The
* first one does not write to the output. The second one doesn't either.
* The fifth one however is from 4001 to 5000. So it can potentially have
* the element 4567 which lies between them.
*
* This particular thread writes to the output the lower bound, upper bound
* and whether the element equals the lower bound element. So, it would be
* 4001, 5000, 0
*
* The next pass would subdivide 4001 to 5000 into smaller segments and
* continue the same process from there.
*
* When a pass returns 1 in the third element, it means the element has been
* found and we can stop executing the kernel. If the element is not found,
* then the execution stops after looking at segment of size 1.
*/
uint32_t global_lower_bound = 0;
uint32_t global_upper_bound = bs->length - 1;
uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) /
bs->num_sub_divisions;
if ((bs->input[0] > bs->find_me) ||
(bs->input[bs->length - 1] < bs->find_me)) {
bs->output[0] = 0;
bs->output[1] = bs->length - 1;
bs->output[2] = 0;
std::cout << "Returning too early" << std::endl;
return HSA_STATUS_SUCCESS;
}
bs->output[3] = 1;
// Setup the kernel args
// See the meta-data for the compiled OpenCL kernel code to ascertain
// the sizes, padding and alignment required for kernel arguments.
// This can be seen by executing
// $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
// The kernel code will expect the following arguments aligned as shown.
typedef uint32_t uint2[2];
typedef uint32_t uint4[4];
struct __attribute__((aligned(16))) local_args_t {
uint4* outputArray;
uint2* sortedArray;
uint32_t findMe;
uint32_t pad;
uint64_t global_offset_x;
uint64_t global_offset_y;
uint64_t global_offset_z;
} local_args;
local_args.outputArray = reinterpret_cast<uint4*>(bs->output);
local_args.sortedArray = reinterpret_cast<uint2*>(bs->input_arr_local);
local_args.findMe = bs->find_me;
local_args.global_offset_x = 0;
local_args.global_offset_y = 0;
local_args.global_offset_z = 0;
// Copy the kernel args structure into kernel arg memory
err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args),
&bs->kern_arg_address);
RET_IF_HSA_ERR(err);
// Populate an AQL packet with the info we've gathered
hsa_kernel_dispatch_packet_t aql;
PopulateAQLPacket(bs, &aql);
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
while ((sub_div_size > 1) && (bs->output[3] != 0)) {
for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) {
int idx1 = i * sub_div_size;
int idx2 = ((i + 1) * sub_div_size) - 1;
bs->input_arr[2 * i] = bs->input[idx1];
bs->input_arr[2 * i + 1] = bs->input[idx2];
}
// Copy kernel parameter from system memory to local memory
err = AgentMemcpy(reinterpret_cast<uint8_t*>(bs->input_arr_local),
reinterpret_cast<uint8_t*>(bs->input_arr), in_length, bs->gpu_dev,
bs->cpu_dev);
RET_IF_HSA_ERR(err);
// Reset output buffer to zero
bs->output[3] = 0;
// Dispatch kernel with global work size, work group size with ONE dimesion
// and wait for kernel to complete
// Compute the write index of queue and copy Aql packet into it
uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue);
const uint32_t mask = bs->queue->size - 1;
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLToQueue(&aql, bs->queue);
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
// Set the packet's type, acquire and release fences. This should be done
// atomically after all the other fields have been set, using release
// memory ordering to ensure all the fields are set when the door bell
// signal is activated.
void* q_base = bs->queue->base_address;
AtomicSetPacketHeader(aql_header, aql.setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(q_base))[que_idx & mask]);
// Increment the write index and ring the doorbell to dispatch kernel.
hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1));
hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx);
// Wait on the dispatch signal until the kernel is finished.
// Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of
// HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we
// have below.
// The call below will block until the condition is met. Below we have said
// the condition is that the signal value (initiailzed to 1) associated with
// the queue is less than 1. When the kernel associated with the queued AQL
// packet has completed execution, the signal value is automatically
// decremented by the packet processor.
hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal,
HSA_SIGNAL_CONDITION_LT, 1,
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
// value should be 0, or we timed-out
if (value) {
std::cout << "Timed out waiting for kernel to complete?" << std::endl;
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
}
// Reset the signal to its initial value for the next iteration
hsa_signal_store_screlease(bs->signal, 1);
// Binary search algorithm stuff...
global_lower_bound = bs->output[0] * sub_div_size;
global_upper_bound = global_lower_bound + sub_div_size - 1;
sub_div_size = (global_upper_bound - global_lower_bound + 1) /
bs->num_sub_divisions;
}
uint32_t element_index = UINT_MAX;
for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) {
if (bs->input[i] == bs->find_me) {
element_index = i;
bs->output[0] = i;
bs->output[1] = i + 1;
bs->output[2] = 1;
break;
}
// Element is not found in region specified
// by global lower bound to global upper bound
bs->output[2] = 0;
}
uint32_t is_elem_found = bs->output[2];
std::cout << "Lower bound = " << global_lower_bound << std::endl;
std::cout << "Upper bound = " << global_upper_bound << std::endl;
std::cout << "Element search for = " << bs->find_me << std::endl;
if (is_elem_found == 1) {
std::cout << "Element found at index " << element_index << std::endl;
}
else {
std::cout << "Element value " << bs->find_me << " not found" << std::endl;
}
return HSA_STATUS_SUCCESS;
}
// Release all the RocR resources we have acquired in this application.
hsa_status_t CleanUp(BinarySearch* bs) {
hsa_status_t err;
err = hsa_amd_memory_pool_free(bs->input);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_free(bs->output);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_free(bs->input_arr);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_free(bs->kern_arg_buffer);
RET_IF_HSA_ERR(err);
err = hsa_queue_destroy(bs->queue);
RET_IF_HSA_ERR(err);
err = hsa_signal_destroy(bs->signal);
RET_IF_HSA_ERR(err);
err = hsa_shut_down();
RET_IF_HSA_ERR(err);
return HSA_STATUS_SUCCESS;
}
int main(int argc, char* argv[]) {
// This BinarySearch structure (bs) below holds all of the appl. specific
// info we need to run the sample. This includes algorithm specific
// information as well as handles to RocR/HSA objects.
// The basic structure of this sample is to fill in this structure with the
// required RocR/HSA handles to RocR resources (e.g., agents, memory pools,
// queues, etc.) and then dispatch the packets to the queue, and examine the
// output.
BinarySearch bs;
hsa_status_t err;
// Set some working values specific to this application
InitializeBinarySearch(&bs);
// hsa_init() initializes internal data structures and causes devices (agents),
// memory pools and other resources to be discovered.
err = hsa_init();
RET_IF_HSA_ERR(err);
// Find the agents needed for the sample
err = FindDevices(&bs);
RET_IF_HSA_ERR(err);
// Create the completion signal used when dispatching a packet
err = hsa_signal_create(1, 0, NULL, &bs.signal);
RET_IF_HSA_ERR(err);
// Create a queue to submit our binary search AQL packets
err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, &bs.queue);
RET_IF_HSA_ERR(err);
// Find the HSA memory pools we need to run this sample
err = FindPools(&bs);
RET_IF_HSA_ERR(err);
// Allocate memory from the correct memory pool, and initialize them as
// neeeded for the algorihm.
err = AllocateAndInitBuffers(&bs);
RET_IF_HSA_ERR(err);
// Create a kernel object from the pre-compiled kernel, and read some
// attributes associated with the kernel that we will need.
err = LoadKernelFromObjFile(&bs);
RET_IF_HSA_ERR(err);
// Fill in the AQL packet, assign the kernel arguments, enqueue the packet,
// "ring" the doorbell, and wait for completion.
err = Run(&bs);
RET_IF_HSA_ERR(err);
// Release all the RocR resources we've acquired and shutdown HSA.
err = CleanUp(&bs);
return 0;
}
#undef RET_IF_HSA_ERR
@@ -0,0 +1,127 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
/**
* One instance of this kernel call is a thread.
* Each thread finds out the segment in which it should look for the element.
* After that, it checks if the element is between the lower bound and upper
* bound of its segment. If yes, then this segment becomes the total
* searchspace for the next pass.
*
* To achieve this, it writes the lower bound and upper bound to the output
* array. In case the element at the left end (lower bound) matches the element
* we are looking for, that is marked in the output and we no longer need to
* look any further.
*/
__kernel void
binarySearch(__global uint4 * outputArray,
__const __global uint2 * sortedArray,
const unsigned int findMe) {
unsigned int tid = get_global_id(0);
// Then we find the elements for this thread
uint2 element = sortedArray[tid];
// If the element to be found does not lie between
// them, then nothing left to do in this thread
if((element.x > findMe) || (element.y < findMe)) {
return;
} else {
// However, if the element does lie between the lower
// and upper bounds of this thread's searchspace
// we need to narrow down the search further in this
// search space
// The search space for this thread is marked in the
// output as being the total search space for the next pass
outputArray[0].x = tid;
outputArray[0].w = 1;
}
}
__kernel void
binarySearch_mulkeys(__global int *keys,
__global uint *input,
const unsigned int numKeys,
__global int *output) {
int gid = get_global_id(0);
int lBound = gid * 256;
int uBound = lBound + 255;
for(int i = 0; i < numKeys; i++) {
if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
output[i]=lBound;
}
}
__kernel void
binarySearch_mulkeysConcurrent(__global uint *keys,
__global uint *input,
const unsigned int inputSize, // num. of inputs
const unsigned int numSubdivisions,
__global int *output) {
int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
int uBound = lBound + inputSize / numSubdivisions;
int myKey = keys[get_global_id(0) / numSubdivisions];
int mid;
while(uBound >= lBound) {
mid = (lBound + uBound) / 2;
if(input[mid] == myKey) {
output[get_global_id(0) / numSubdivisions] = mid;
return;
} else if(input[mid] > myKey) {
uBound = mid - 1;
} else {
lBound = mid + 1;
}
}
}
@@ -0,0 +1,224 @@
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 2.8.0)
#
# GCC 4.8 or higher compiler required.
#
# Setup build environment
#
# 1) Set env. variable specifying the location of ROCR header files
#
# export ROCR_DIR="Root for RocR install"
#
# 2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
# If not set, the default value is "Debug" is bound.
#
# export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
#
# 3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
# If not set, the default value of "64" is bound.
#
# export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
#
# 4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
# gfx900, ...)
#
# 5) Set env. variables AMDHSAFIN_DIR and and AMDHSAFIN_TARGET to the
# directory containing the amd finalizer executable and version
# (e.g, 8:0:3) respectively.
#
# Building rocrtst Suite
#
# 1) Create build folder e.g. "rocrtst/build" - any name will do
# 2) Cd into build folder
# 3) Run "cmake .."
# 4) Run "make"
#
#
# Currently support for Windows platform is not present
#
if(WIN32)
MESSAGE("rocrtst Suite is not supported on Windows platform")
RETURN()
endif()
#
# Process environment variables relating to Build type, size and RT version
#
string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
if("${tmp}" STREQUAL debug)
set(BUILD_TYPE "Debug")
set(ISDEBUG 1)
else()
set(BUILD_TYPE "Release")
set(ISDEBUG 0)
endif()
if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
set (ONLY64STR "")
set (IS64BIT 0)
else()
set (ONLY64STR "64")
set (IS64BIT 1)
endif()
set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include)
set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
#
# Determine ROCR Header files are present
#
if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
MESSAGE("ERROR: ${ROCR_INC_DIR}/hsa/hsa.h does not exist. Check ROCR_DIR env. variable.")
RETURN()
endif()
# Determine ROCR Library files are present
#
if (${IS64BIT} EQUAL 0)
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
RETURN()
endif()
else()
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
RETURN()
endif()
endif()
#
# Set Name for rocrtst Suite Project
#
set(ROCRTST_SUITE_NAME "rocrtst${ONLY64STR}")
project (${ROCRTST_SUITE_NAME})
#
# Print out the build configuration being used:
#
# Build Src directory
# Build Binary directory
# Build Type: Debug Vs Release, 32 Vs 64
# Compiler Version, etc
#
MESSAGE("")
MESSAGE("-------------IS64BIT: " ${IS64BIT})
MESSAGE("-----------BuildType: " ${BUILD_TYPE})
MESSAGE("------------Compiler: " ${CMAKE_CXX_COMPILER})
MESSAGE("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
MESSAGE("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
MESSAGE("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
MESSAGE("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
MESSAGE("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
MESSAGE("")
#
# Set the build type based on user input
#
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )
#
# Compiler pre-processor definitions.
#
# Define MACRO "DEBUG" if build type is "Debug"
if(${BUILD_TYPE} STREQUAL "Debug")
add_definitions(-DDEBUG)
endif()
add_definitions(-D__linux__)
add_definitions(-DLITTLEENDIAN_CPU=1)
#
# Linux Compiler options
#
set(CMAKE_CXX_FLAGS "-std=c++11 ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
#
# Extend the compiler flags for 64-bit builds
#
if (IS64BIT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
#
# Add compiler flags to include symbol information for debug builds
#
if(ISDEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
endif()
MESSAGE("ISDEBUG STEP:Done")
set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
# Set Name for Google Test Framework and build it as a
# static library to be linked with user test programs
#
set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}")
add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest")
set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME}
hsa-runtime-tools${ONLY64STR})
MESSAGE("ROCRTST_LIBS SET STEP:Done")
#
#
# Other source directories
aux_source_directory(${ROCRTST_ROOT}/common common_srcs)
#
# Specify the directory containing various libraries of ROCR
# to be linked against for building ROC Perf applications
#
LINK_DIRECTORIES(${ROCR_LIB_DIR})
#
# Extend the list of libraries to be used for linking ROC Perf Apps
#
set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime${ONLY64STR})
# Set Name for rocrtst
MESSAGE(${ROCRTST_LIBS})
set(ROCRTST "rocrtst${ONLY64STR}")
#
# Sorce files for building rocrtst
#
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} performanceSources)
# Header file include path
include_directories(${ROCR_INC_DIR})
include_directories(${ROCRTST_ROOT})
include_directories(${ROCRTST_ROOT}/gtest/include)
# Build rules
add_executable(${ROCRTST} ${performanceSources} ${common_srcs})
target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt)
INSTALL(TARGETS ${ROCRTST}
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -0,0 +1,258 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "cp_process_time.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/os.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_finalize.h"
#include <algorithm>
static const uint64_t kKernelIterations = 10000;
static const uint64_t kTestBadValue = 1234567891234567891;
//Set up some expectations for reasonable processing times
//For gfx803, Overhead time had a max of 18.208uS and a min of 7.82uS
static const double kGfx803MinOverhead = 7.78;
static const double kGfx803MaxOverhead = 21.064;
static const double kOverheadToleranceFactor = 0.25;
CpProcessTime::CpProcessTime() :
BaseRocR() {
// kernel_name_ = "&__simple_kernel";
mean_ = 0.0;
}
CpProcessTime::~CpProcessTime() {
}
void CpProcessTime::SetUp() {
hsa_status_t err;
set_kernel_file_name("simple_kernel.o");
set_kernel_name("&__simple_kernel");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
// Create a queue
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
ASSERT_NE(q, nullptr);
set_main_queue(q);
// Set profiling
err = hsa_amd_profiling_set_profiler_enabled(q, 1);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Load and finalize the kernel
err = rocrtst::LoadKernelFromObjFile(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = 1;
aql().grid_size_x = 1;
}
size_t CpProcessTime::RealIterationNum() {
return num_iteration() * 1.2 + 1;
}
void CpProcessTime::Run() {
hsa_status_t err;
std::vector<double> timer;
if (!rocrtst::CheckProfile(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
ASSERT_NE(gpu_dev, nullptr);
ASSERT_NE(cpu_dev, nullptr);
uint32_t it = RealIterationNum();
typedef struct args_t {
uint64_t* iteration;
uint64_t* result;
} args;
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t* iter = NULL;
uint64_t* result = NULL;
err = rocrtst::AllocAndAllowAccess(this, sizeof(uint64_t), cpu_pool(),
(void**)&iter);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = rocrtst::AllocAndAllowAccess(this, sizeof(uint64_t), cpu_pool(),
(void**)&result);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
*iter = kKernelIterations;
*result = kTestBadValue;
args k_args;
k_args.iteration = (uint64_t*)iter;
k_args.result = (uint64_t*)result;
err = rocrtst::AllocAndSetKernArgs(this, &k_args, sizeof(args));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
rocrtst::WriteAQLToQueue(this);
void * q_base_addr = main_queue()->base_address;
const uint32_t queue_mask = main_queue()->size - 1;
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
// aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
// HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
// aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
// HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
for (uint32_t i = 0; i < it; i++) {
// uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
uint64_t que_idx = hsa_queue_add_write_index_relaxed(main_queue(), 1);
//Get timing stamp an ring the doorbell to dispatch the kernel.
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
&((hsa_kernel_dispatch_packet_t*)(q_base_addr))[que_idx & queue_mask]);
hsa_queue_store_write_index_relaxed(main_queue(), (que_idx + 1));
hsa_signal_store_relaxed(main_queue()->doorbell_signal, que_idx);
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
// hsa_signal_value_t value = hsa_signal_wait_scacquire(signal(),
// HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
// value should be 0, or we timed-out
//ASSERT_EQ(value, 0);
p_timer.StopTimer(id);
hsa_amd_profiling_dispatch_time_t dispatch_time;
err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
&dispatch_time);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t ticks = dispatch_time.end - dispatch_time.start;
uint64_t freq;
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
hsa_signal_store_screlease(signal(), 1);
double execution_time = (double) ticks / freq * 1e6; //convert to us
double temp = p_timer.ReadTimer(id) * 1e6;
double cp_time = temp - execution_time;
#ifdef DEBUG
std::cout << "Total:" << temp << "uS ";
std::cout << "Execution:" << execution_time << "uS ";
std::cout << "Overhead:" << cp_time << "uS ";
std::cout << "Overhead %:" << cp_time / execution_time * 100 << std::endl;
#endif
EXPECT_EQ(kKernelIterations, *result);
timer.push_back(cp_time);
//Assume overhead will not deviate too much from previously recorded
// values. If this does happen and there is not a performance bug,
// modify these constants
//This may need to be made specific to the gpu being used
EXPECT_GT(cp_time, kGfx803MinOverhead * (1 - kOverheadToleranceFactor));
EXPECT_LT(cp_time, kGfx803MaxOverhead * (1 + kOverheadToleranceFactor));
*result = 0;
}
//Abandon the first result and after sort, delete the last 2% value
timer.erase(timer.begin());
std::sort(timer.begin(), timer.end());
timer.erase(timer.begin() + num_iteration(), timer.end());
mean_ = rocrtst::CalcMean(timer);
return;
}
void CpProcessTime::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
if (mean_ == 0.0) {
return;
}
std::cout << "===================================================="
<< std::endl;
std::cout << "The average Command Processor processing time is: " << mean_
<< "us" << std::endl;
std::cout << "===================================================="
<< std::endl;
return;
}
void CpProcessTime::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
@@ -0,0 +1,91 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_CP_PROCESS_TIME_H__
#define __ROCRTST_SRC_CP_PROCESS_TIME_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include <vector>
//@Brief: This class is defined to measure the mean latency of launching
//an empty kernel
class CpProcessTime: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
CpProcessTime();
//@Brief: Destructor
virtual ~CpProcessTime();
//@Brief: Set up the environment for the test
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Display results we got
virtual void DisplayResults() const;
//@Brief: Clean up and close the runtime
virtual void Close();
private:
//@Brief: Get actual iteration number
virtual size_t RealIterationNum();
//@Brief: Store the size of queue
uint32_t queue_size_;
//@Brief: The mean time of CP Processing
double mean_;
};
#endif
@@ -0,0 +1,220 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "cu_masking.h"
#include "common/base_rocr_utils.h"
#include "gtest/gtest.h"
CuMasking::CuMasking() :
BaseRocR() {
memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
mean_ = 0.0;
group_region_.handle = 0;
cu_ = NULL;
}
CuMasking::~CuMasking() {
}
void CuMasking::SetUp() {
hsa_status_t err;
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
set_kernel_file_name("cu_masking.o");
set_kernel_name("&main");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
// Create a queue
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
set_main_queue(q);
rocrtst::LoadKernelFromObjFile(this);
// Fill up the kernel packet except header
// aql().completion_signal=signal();
// TODO: Will delete manual_input later
uint32_t cu_count = 0;
err = hsa_agent_get_info(*gpu_dev,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::cout << "CU# is: " << cu_count << std::endl;
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = 1024;
//manual_input * group_input; // workgroup_max_size;
aql().grid_size_x = (long long) 1024 * 640 * 640;
// TODO:Manully set the max cu number to 8, the api return 10
std::cout << "Grid size is: " << aql().grid_size_x << std::endl;
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev,
rocrtst::FindGlobalPool, &cpu_pool());
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
}
size_t CuMasking::RealIterationNum() {
return num_iteration() * 1.2 + 1;
}
void CuMasking::Run() {
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
std::vector<double> timer;
typedef struct args_t {
uint32_t* iteration;
uint32_t* result;
} local_args;
uint32_t* iter = NULL;
uint32_t* result = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(uint32_t), 0,
(void**) &iter);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(uint32_t), 0,
(void**) &result);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
*iter = 0xff;
*result = 0;
err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, iter);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, result);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
local_args* kernarg = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(), kernarg_size(), 0,
(void**) &kernarg);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_device1(), NULL, kernarg);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kernarg->iteration = iter;
kernarg->result = result;
aql().kernarg_address = kernarg;
// Obtain the current queue write inex.
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
// Write the aql packet at the calculate queue index address.
const uint32_t queue_mask = main_queue()->size - 1;
// Set CU mask
uint32_t cu_mask = 0;
#if 0
std::cout << "Enter cu mask value:" << std::endl;
ASSERT_NE(scanf("%d", &cu_mask), EOF);
#else
cu_mask = 0xAAAAAAAA;
#endif
std::cout << "Value of bit array is: 0x" << std::hex << cu_mask << std::endl;
err = hsa_amd_queue_cu_set_mask(main_queue(), 32, &cu_mask);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
void *q_base_addr = main_queue()->base_address;
// Write the aql packet at the calculate queue index address.
aql().completion_signal = signal();
((hsa_kernel_dispatch_packet_t*)(q_base_addr))[index & queue_mask] = aql();
// Get timing stamp an ring the doorbell to dispatch the kernel.
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
hsa_signal_store_screlease(signal(), 1);
double t1 = p_timer.ReadTimer(id) * 1e6;
std::cout << "Execution time after setting cu masking: " << t1 << std::endl;
return;
}
void CuMasking::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "===================================================="
<< std::endl;
std::cout << "====================================================="
<< std::endl;
return;
}
void CuMasking::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
@@ -0,0 +1,103 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_CU_MASKING_TIME_H__
#define __ROCRTST_SRC_CU_MASKING_TIME_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "common/hsatimer.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_finalize.h"
#include <algorithm>
#include <vector>
//@Brief: This class is defined to measure the mean latency of launching
//an empty kernel
class CuMasking: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
CuMasking();
//@Brief: Destructor
virtual ~CuMasking();
//@Brief: Set up the environment for the test
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Display results we got
virtual void DisplayResults() const;
//@Brief: Clean up and close the runtime
virtual void Close();
private:
//@Brief: Get actual iteration number
virtual size_t RealIterationNum();
//@Brief: Store the size of queue
uint32_t queue_size_;
//@Brief: The mean time of CP Processing
double mean_;
//@Brief: The group memory region
hsa_region_t group_region_;
//@Brief: Pointer to cu_id array
uint32_t* cu_;
uint32_t manual_input;
uint32_t group_input;
};
#endif
@@ -0,0 +1,293 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "device_load_bandwidth.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/os.h"
#include "gtest/gtest.h"
#include <algorithm>
// TODO: The validation code has problems to debug
#if 0
static void initGlobalReadBuffer(uint32_t* in_data, uint32_t num_thrds,
uint32_t num_ops, uint32_t num_loops) {
// Populate input buffer with thread Id left shifted by 2.
uint32_t value = 0;
uint32_t val_idx;
for (uint32_t idx1 = 0; idx1 < num_loops; idx1++) {
val_idx = 0;
for (uint32_t idx2 = 0; idx2 < num_ops; idx2++) {
// Write the value to be read by each thread
for (uint32_t idx3 = 0; idx3 < num_thrds; idx3++) {
value = idx3 << 2;
in_data[val_idx++] = value;
}
}
}
return;
}
static bool verifyGlobalLoadKernel(uint32_t* data, uint32_t num_thrds,
uint32_t scale, const char* kernel_name) {
// Verify kernel operation i.e. validate the data in the output buffer.
uint32_t valid_value = 0;
for (uint32_t idx = 0; idx < num_thrds; idx++) {
valid_value = (idx << 2) * scale;
if (data[idx] != valid_value) {
std::cout << "Value expected = " << valid_value << std::endl;
std::cout << "Value of data = " << data[idx] << std::endl;
std::cout << kernel_name << ": VALIDATION FAILED ! Bad index: " << idx
<< std::endl;
std::cout << kernel_name << ": VALUE @ Bad index: " << data[idx]
<< std::endl;
std::cout << std::endl;
return false;
}
}
#ifdef DEBUG
std::cout << kernel_name << ": Passed validation" << std::endl;
std::cout << std::endl;
#endif
return true;
}
#endif
// Constructor
DeviceLoadBandwidth::DeviceLoadBandwidth() :
BaseRocR() {
set_group_size(0);
set_enable_interrupt(false);
num_group_ = 0;
num_cus_ = 0;
kernel_loop_count_ = 0;
mean_ = 0.0;
data_size_ = 0;
set_requires_profile (HSA_PROFILE_BASE);
}
// Destructor
DeviceLoadBandwidth::~DeviceLoadBandwidth() {
}
// Set up the test environment
void DeviceLoadBandwidth::SetUp() {
SetWorkItemNum();
set_kernel_file_name("sysMemRead.o");
set_kernel_name("&__SysMemLoad");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
//Create a queue with max number size
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
ASSERT_NE(q, nullptr);
set_main_queue(q);
rocrtst::LoadKernelFromObjFile(this);
uint32_t total_work_items = num_cus_ * num_group_ * group_size();
//Fill up part of aql
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = group_size();
aql().grid_size_x = total_work_items;
return;
}
// Run the test
void DeviceLoadBandwidth::Run() {
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
uint32_t total_workitems = num_cus_ * num_group_ * group_size();
uint32_t ops_thrd = 32;
uint64_t addr_step = (uint64_t) total_workitems * sizeof(uint64_t);
uint64_t total_ops = (uint64_t) total_workitems * ops_thrd;
uint64_t in_data_size = (uint64_t) total_ops * sizeof(uint64_t);
data_size_ = in_data_size;
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = rocrtst::AllocAndAllowAccess(this, in_data_size, device_pool(),
(void**)&in_data_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
//uint32_t out_data_size = total_workitems * sizeof(uint64_t);
uint32_t out_data_size = in_data_size;
err = rocrtst::AllocAndAllowAccess(this, out_data_size, device_pool(),
(void**)&out_data_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
#if 0
initGlobalReadBuffer(in_data_, total_workitems, ops_thrd, kernel_loop_count_);
#endif
struct local_args_t {
void* arg0;
void* arg1;
uint64_t arg2;
void* arg3;
} local_args;
local_args.arg0 = in_data_;
local_args.arg1 = in_data_ + total_ops;
local_args.arg2 = addr_step;
local_args.arg3 = out_data_;
// Copy the kernel args structure into a registered memory block
err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::vector<double> time;
rocrtst::WriteAQLToQueue(this);
// Write the aql packet at the calculated queue index address.
const uint32_t queue_mask = main_queue()->size - 1;
void * q_base = main_queue()->base_address;
for (uint32_t i = 0; i < num_iteration(); i++) {
uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
&((hsa_kernel_dispatch_packet_t*)(q_base))[que_idx & queue_mask]);
hsa_signal_store_screlease(main_queue()->doorbell_signal, que_idx);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
#ifdef DEBUG
std::cout << "." << std::flush;
#endif
#if 0
// Verify the results
uint32_t scale = kernel_loop_count_ * ops_thrd;
verifyGlobalLoadKernel(out_data_, total_workitems, scale,
kernel_name().c_str());
#endif
time.push_back(p_timer.ReadTimer(id));
hsa_signal_store_screlease(signal(), 1);
}
#ifdef DEBUG
std::cout << std::endl;
#endif
time.erase(time.begin());
std::sort(time.begin(), time.end());
time.erase(time.begin() + num_iteration(), time.end());
mean_ = rocrtst::CalcMean(time);
return;
}
void DeviceLoadBandwidth::Close() {
hsa_status_t err;
err = hsa_amd_memory_pool_free(in_data_);
EXPECT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_free(out_data_);
EXPECT_EQ(err, HSA_STATUS_SUCCESS);
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
void DeviceLoadBandwidth::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "=======================================" << std::endl;
std::cout << "Device Load Bandwidth: ";
std::cout << data_size_ / mean_ / 1024 / 1024 / 1024 << "(GB/S)" << std::endl;
std::cout << "=======================================" << std::endl;
return;
}
@@ -0,0 +1,119 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_INC_DEVICE_LOAD_BANDWIDTH_H__
#define __ROCRTST_SRC_INC_DEVICE_LOAD_BANDWIDTH_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include <stdio.h>
class DeviceLoadBandwidth: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
DeviceLoadBandwidth();
//@Brief: Destructor
~DeviceLoadBandwidth();
//@Brief: Set up the testing environment
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Close and clean up the test enrionment
virtual void Close();
//@Brief: Display load bandwidth
virtual void DisplayResults() const;
//@Brief: Set work-item configuration
void SetWorkItemNum() {
#ifdef INTERACTIVE
uint32_t tmp;
printf("Please input the number of CUs you want to try:\n");
scanf("%d", &num_cus_);
printf("Please input the number of groups you want to try:\n");
scanf("%d", &num_group_);
printf("Please input the size of each group:\n");
scanf("%d", &tmp);
set_group_size(tmp);
printf("Please input the number of kernel loop you want to try:\n");
scanf("%d", &kernel_loop_count_);
#else
num_cus_ = 16;
num_group_ = 128;
set_group_size(64);
kernel_loop_count_ = 16;
#endif
return;
}
private:
//@Brief: number of group
uint32_t num_group_;
//@Brief: number of CUs
uint32_t num_cus_;
//@Brief: number of kernel loop
uint32_t kernel_loop_count_;
//@Brief: Mean execution time
double mean_;
//@Brief: data size for test
uint64_t data_size_;
uint32_t* in_data_;
uint32_t* out_data_;
};
#endif
@@ -0,0 +1,219 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "device_store_bandwidth.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
// Constructor
DeviceStoreBandwidth::DeviceStoreBandwidth() :
BaseRocR() {
set_group_size(0);
num_group_ = 0;
num_cus_ = 0;
kernel_loop_count_ = 0;
mean_ = 0.0;
data_size_ = 0;
set_requires_profile (HSA_PROFILE_BASE);
in_data_ = nullptr;
out_data_ = nullptr;
}
// Destructor
DeviceStoreBandwidth::~DeviceStoreBandwidth() {
}
// Set up the test environment
void DeviceStoreBandwidth::SetUp() {
SetWorkItemNum();
set_kernel_file_name("sysMemWrite.o");
set_kernel_name("&__SysMemStore");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
//Create a queue with max number size
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
ASSERT_NE(q, nullptr);
set_main_queue(q);
rocrtst::LoadKernelFromObjFile(this);
uint32_t total_work_items = num_cus_ * num_group_ * group_size();
//Fill up part of aql
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = group_size();
aql().grid_size_x = total_work_items;
return;
}
// Run the test
void DeviceStoreBandwidth::Run() {
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
uint32_t total_workitems = num_cus_ * num_group_ * group_size();
uint32_t ops_thrd = 16;
uint64_t addr_step = (uint64_t) total_workitems * sizeof(uint32_t);
uint64_t total_ops = (uint64_t) total_workitems * kernel_loop_count_
* ops_thrd;
uint64_t in_data_size = (uint64_t) total_ops * sizeof(uint32_t);
data_size_ = in_data_size;
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = rocrtst::AllocAndAllowAccess(this, in_data_size, device_pool(),
(void**)&in_data_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint32_t out_data_size = total_workitems * sizeof(uint32_t);
err = rocrtst::AllocAndAllowAccess(this, out_data_size, device_pool(),
(void**)&out_data_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
struct local_args_t {
void* arg0;
void* arg1;
uint64_t arg2;
void* arg3;
} local_args;
local_args.arg0 = in_data_;
local_args.arg1 = in_data_ + total_ops;
local_args.arg2 = addr_step;
local_args.arg3 = out_data_;
// Copy the kernel args structure into a registered memory block
err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::vector<double> time;
rocrtst::WriteAQLToQueue(this);
for (uint32_t i = 0; i < num_iteration(); i++) {
uint64_t que_idx = hsa_queue_load_write_index_relaxed(main_queue());
// Write the aql packet at the calculated queue index address.
const uint32_t queue_mask = main_queue()->size - 1;
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
void * q_base = main_queue()->base_address;
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
rocrtst::AtomicSetPacketHeader(aql_header, aql().setup,
&((hsa_kernel_dispatch_packet_t*)(q_base))[que_idx & queue_mask]);
hsa_signal_store_screlease(main_queue()->doorbell_signal, que_idx);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
#ifdef DEBUG
std::cout << "." << std::flush;
#endif
time.push_back(p_timer.ReadTimer(id));
hsa_signal_store_screlease(signal(), 1);
}
#ifdef DEBUG
std::cout << std::endl;
#endif
time.erase(time.begin());
mean_ = rocrtst::CalcMean(time);
return;
}
void DeviceStoreBandwidth::Close() {
hsa_status_t err;
err = hsa_amd_memory_pool_free(in_data_);
EXPECT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_free(out_data_);
EXPECT_EQ(err, HSA_STATUS_SUCCESS);
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
void DeviceStoreBandwidth::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "=======================================" << std::endl;
std::cout << "Device Store Bandwidth: ";
std::cout << data_size_ / mean_ / 1024 / 1024 / 1024 << "(GB/S)" << std::endl;
std::cout << "=======================================" << std::endl;
return;
}
@@ -0,0 +1,119 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_INC_DEVICE_STORE_BANDWIDTH_H__
#define __ROCRTST_SRC_INC_DEVICE_STORE_BANDWIDTH_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include <stdio.h>
class DeviceStoreBandwidth: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
DeviceStoreBandwidth();
//@Brief: Destructor
~DeviceStoreBandwidth();
//@Brief: Set up the testing environment
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Close and clean up the test enrionment
virtual void Close();
//@Brief: Display load bandwidth
virtual void DisplayResults() const;
//@Brief: Set work-item configuration
void SetWorkItemNum() {
#ifdef INTERACTIVE
uint32_t tmp;
printf("Please input the number of CUs you want to try:\n");
scanf("%d", &num_cus_);
printf("Please input the number of groups you want to try:\n");
scanf("%d", &num_group_);
printf("Please input the size of each group:\n");
scanf("%d", &tmp);
set_group_size(tmp);
printf("Please input the number of kernel loop you want to try:\n");
scanf("%d", &kernel_loop_count_);
#else
num_cus_ = 32;
num_group_ = 128;
set_group_size(64);
kernel_loop_count_ = 16;
#endif
return;
}
private:
//@Brief: number of group
uint32_t num_group_;
//@Brief: number of CUs
uint32_t num_cus_;
//@Brief: number of kernel loop
uint32_t kernel_loop_count_;
//@Brief: Mean execution time
double mean_;
//@Brief: data size for test
uint64_t data_size_;
uint32_t* in_data_;
uint32_t* out_data_;
};
#endif
@@ -0,0 +1,331 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "dispatch_time.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/os.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
#include <algorithm>
DispatchTime::DispatchTime() :
BaseRocR() {
use_default_ = false;
launch_single_ = false;
queue_size_ = 0;
num_batch_ = 100000;
memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
single_default_mean_ = 0.0;
single_interrupt_mean_ = 0.0;
multi_default_mean_ = 0.0;
multi_interrupt_mean_ = 0.0;
}
DispatchTime::~DispatchTime() {
}
void DispatchTime::SetUp() {
// If it indicates to use default signal, set env var properly
if (use_default_) {
set_enable_interrupt(false);
}
else {
set_enable_interrupt(true);
}
set_kernel_file_name("empty_kernel.o");
set_kernel_name("&__Empty_kernel");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
// Create a queue
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
ASSERT_NE(q, nullptr);
set_main_queue(q);
// Here, modify the batch size if it is larger than the queue size
if (!launch_single_) {
hsa_status_t err;
uint32_t size = 0;
err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
num_batch_ = num_batch_ > size ? size : num_batch_;
}
rocrtst::LoadKernelFromObjFile(this);
// Fill up the kernel packet except header
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = 1;
aql().grid_size_x = 1;
}
void DispatchTime::Run() {
if (!rocrtst::CheckProfile(this)) {
return;
}
if (launch_single_) {
RunSingle();
}
else {
RunMulti();
}
}
size_t DispatchTime::RealIterationNum() {
return num_iteration() * 1.2 + 1;
}
void DispatchTime::RunSingle() {
std::vector<double> timer;
int it = RealIterationNum();
const uint32_t queue_mask = main_queue()->size - 1;
//queue should be empty
ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
hsa_queue_load_write_index_scacquire(main_queue()));
void *q_base_addr = main_queue()->base_address;
for (int i = 0; i < it; i++) {
//Obtain the current queue write index.
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
ASSERT_LT(index, main_queue()->size + index);
//Write the aql packet at the calculated queue index address.
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
//Get timing stamp and ring the doorbell to dispatch the kernel.
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
//Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
timer.push_back(p_timer.ReadTimer(id));
hsa_signal_store_screlease(signal(), 1);
#ifdef DEBUG
std::cout << ".";
fflush(stdout);
#endif
}
std::cout << std::endl;
//Abandon the first result and after sort, delete the last 2% value
timer.erase(timer.begin());
std::sort(timer.begin(), timer.end());
timer.erase(timer.begin() + num_iteration(), timer.end());
if (use_default_) {
single_default_mean_ = rocrtst::CalcMean(timer);
}
else {
single_interrupt_mean_ = rocrtst::CalcMean(timer);
}
return;
}
void DispatchTime::RunMulti() {
std::vector<double> timer;
int it = RealIterationNum();
const uint32_t queue_mask = main_queue()->size - 1;
//queue should be empty
ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
hsa_queue_load_write_index_scacquire(main_queue()));
for (int i = 0; i < it; i++) {
uint64_t* index = (uint64_t*) malloc(sizeof(uint64_t) * num_batch_);
hsa_signal_store_screlease(signal(), num_batch_);
for (uint32_t j = 0; j < num_batch_; j++) {
//index[j] = hsa_queue_add_write_index_scacq_screl(main_queue(), 1);
index[j] = hsa_queue_add_write_index_relaxed(main_queue(), 1);
//Write the aql packet at the calculated queue index address.
((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
& queue_mask] = aql();
if (j == num_batch_ - 1) {
((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
& queue_mask].header |= 1 << HSA_PACKET_HEADER_BARRIER;
//TODO: verify if the below is needed. I don't think it is. It should
// already be initialized to signal().
((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
& queue_mask].completion_signal = signal();
}
}
// Set packet header reversly; set all headers except the very first
// one, for now.
for (uint32_t j = num_batch_ - 1; j > 0; j--) {
((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[j]
& queue_mask].header |= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE;
}
//Get timing stamp and ring the doorbell to dispatch the kernel.
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
//Set the very first header...
((hsa_kernel_dispatch_packet_t*) (main_queue()->base_address))[index[0]
& queue_mask].header |= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE;
for (uint32_t j = 0; j < num_batch_; j++) {
hsa_signal_store_screlease(main_queue()->doorbell_signal, index[j]);
}
//Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_EQ, 0,
UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
;
p_timer.StopTimer(id);
timer.push_back(p_timer.ReadTimer(id));
hsa_signal_store_screlease(signal(), 1);
free(index);
#ifdef DEBUG
std::cout << ".";
fflush(stdout);
#endif
}
std::cout << std::endl;
// Abandon the first result and after sort, delete the last 2% value
timer.erase(timer.begin());
std::sort(timer.begin(), timer.end());
timer.erase(timer.begin() + num_iteration(), timer.end());
if (use_default_) {
multi_default_mean_ = rocrtst::CalcMean(timer);
}
else {
multi_interrupt_mean_ = rocrtst::CalcMean(timer);
}
return;
}
void DispatchTime::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "===================================================="
<< std::endl;
if (use_default_) {
if (launch_single_) {
std::cout << "Single_Default: " << single_default_mean_ * 1e6
<< std::endl;
}
else {
std::cout << "Multi_Default: "
<< multi_default_mean_ * 1e6 / num_batch_ << std::endl;
}
}
else {
if (launch_single_) {
std::cout << "Single_Interrupt: " << single_interrupt_mean_ * 1e6
<< std::endl;
}
else {
std::cout << "Multi_Interrupt: "
<< multi_interrupt_mean_ * 1e6 / num_batch_ << std::endl;
}
}
std::cout << "====================================================="
<< std::endl;
return;
}
void DispatchTime::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
@@ -0,0 +1,125 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_DISPATCH_TIME_H__
#define __ROCRTST_SRC_DISPATCH_TIME_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "hsa/hsa.h"
#include <vector>
//@Brief: This class is defined to measure the mean latency of launching
//an empty kernel
class DispatchTime: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
DispatchTime();
//@Brief: Destructor
virtual ~DispatchTime();
//@Brief: Set up the environment for the test
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Display results we got
virtual void DisplayResults() const;
//@Brief: Clean up and close the runtime
virtual void Close();
//@Brief: Choose if use default signal or not
void UseDefaultSignal(bool use_default = true) {
use_default_ = use_default;
}
//@Brief; Choose to launch a single kernels or not
void LaunchSingleKernel(bool launch_single = true) {
launch_single_ = launch_single;
}
private:
//@Brief: Get actual iteration number
virtual size_t RealIterationNum();
//@Brief: Launch single packet each time
virtual void RunSingle();
//@Brief: Launch multiple packets each time
virtual void RunMulti();
//@Brief: Indicate if use default signal or not
bool use_default_;
//@Brief: Indicate if launch single kernel or not
bool launch_single_;
//@Brief: Store the size of queue
uint32_t queue_size_;
//@Brief: Number of packets in a batch
uint32_t num_batch_;
//@Brief: Time of single default signal dispatch time
double single_default_mean_;
//@Brief: Time of single interrupt signal dispatch time
double single_interrupt_mean_;
//@Brief: Time of multi default signal dispatch time
double multi_default_mean_;
//@Brief: Time of multi interrupt signal dispatch time
double multi_interrupt_mean_;
char* orig_iterrupt_env_;
};
#endif
@@ -0,0 +1,351 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "flush_latency.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/os.h"
#include "gtest/gtest.h"
#include <algorithm>
static const int kWorkItem = 1024 * 1204;
// Constructor
FlushLatency::FlushLatency() :
BaseRocR() {
set_group_size(0);
num_group_ = 0;
num_cus_ = 0;
kernel_loop_count_ = 0;
mean_ = 0.0;
data_size_ = 0;
set_requires_profile (HSA_PROFILE_BASE);
}
// Destructor
FlushLatency::~FlushLatency() {
}
// Set up the test environment
void FlushLatency::SetUp() {
hsa_status_t err;
SetWorkItemNum();
set_kernel_file_name("flush_latency.o");
set_kernel_name("&main");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
//Create a queue with max number size
hsa_queue_t* q;
rocrtst::CreateQueue(*gpu_dev, &q);
set_main_queue(q);
//Enable profiling
err = hsa_amd_profiling_set_profiler_enabled(main_queue(), 1);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
rocrtst::LoadKernelFromObjFile(this);
uint32_t total_work_items = kWorkItem * 0.3;
//Fill up part of aql
rocrtst::InitializeAQLPacket(this, &aql());
aql().workgroup_size_x = group_size();
aql().grid_size_x = total_work_items;
return;
}
// Run the test
void FlushLatency::Run() {
hsa_status_t err;
hsa_amd_memory_pool_t cpu_pool;
if (!rocrtst::CheckProfile(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
err = hsa_amd_agent_iterate_memory_pools(*gpu_dev, rocrtst::FindStandardPool,
&device_pool());
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
ASSERT_NE(device_pool().handle, 0);
cpu_pool.handle = 0;
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
&cpu_pool);
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
ASSERT_NE(cpu_pool.handle, 0);
#if DEBUG
std::cout << "Device Pool Properties:" << std::endl;
err = rocrtst::DumpMemoryPoolInfo(device_pool());
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::cout << "Global Pool Properties:" << std::endl;
err = rocrtst::DumpMemoryPoolInfo(cpu_pool);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
#endif
uint32_t out_data_size = 1024 * 1024 * sizeof(uint32_t);
std::vector<double> time_none;
std::vector<double> time_release;
std::vector < uint64_t > time_none_stamp;
std::vector < uint64_t > time_release_stamp;
//Query system timestamp frequency
uint64_t freq;
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
void* out = NULL;
uint32_t* out_data;
const uint32_t queue_mask = main_queue()->size - 1;
typedef struct local_args_t {
void* arg0;
} args;
// Warm up
uint16_t header = 0;
header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
aql().header = header;
err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
(void**) &out_data);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->arg0 = out_data;
aql().kernarg_address = kern_ptr;
// Obtain the current queue write index
int64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
void *q_base_addr = main_queue()->base_address;
// Write the aql packet at the calculated queue index address.
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
hsa_signal_store_screlease(signal(), 1);
for (int i = 0; i < 1000; i++) {
err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
(void**) &out_data);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->arg0 = out_data;
aql().kernarg_address = kern_ptr;
// Obtain the current queue write index
int64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
// Write the aql packet at the calculated queue index address.
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
hsa_amd_profiling_dispatch_time_t dispatch_time;
err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
&dispatch_time);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t sys_start = 0;
uint64_t sys_end = 0;
err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
dispatch_time.start, &sys_start);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
dispatch_time.end, &sys_end);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t stamp = dispatch_time.end - dispatch_time.start;
double execution_time = (double) stamp / freq * 1e6; // convert to us.
time_none.push_back(execution_time);
time_none_stamp.push_back(stamp);
hsa_signal_store_screlease(signal(), 1);
if (out != NULL) {
err = hsa_memory_free(out);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
out = out_data;
out_data = NULL;
}
header = 0;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
aql().header = header;
for (int i = 0; i < 1000; i++) {
err = hsa_amd_memory_pool_allocate(device_pool(), out_data_size, 0,
(void**) &out_data);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool, sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->arg0 = out_data;
aql().kernarg_address = kern_ptr;
// Obtain the current queue write index
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
// Write the aql packet at the calculated queue index address.
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
hsa_signal_store_screlease(signal(), 1);
hsa_amd_profiling_dispatch_time_t dispatch_time;
err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(),
&dispatch_time);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t sys_start = 0;
uint64_t sys_end = 0;
err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
dispatch_time.start, &sys_start);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_profiling_convert_tick_to_system_domain(*gpu_dev,
dispatch_time.end, &sys_end);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
uint64_t stamp = dispatch_time.end - dispatch_time.start;
double execution_time = (double) stamp / freq * 1e6; // convert to us.
time_release.push_back(execution_time);
time_release_stamp.push_back(stamp);
if (out != NULL) {
err = hsa_memory_free(out);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
out = out_data;
out_data = NULL;
}
std::sort(time_none.begin(), time_none.end());
std::sort(time_release.begin(), time_release.end());
time_none.erase(time_none.begin(), time_none.begin() + 50);
time_none.erase(time_none.end() - 50, time_none.end());
time_release.erase(time_release.begin(), time_release.begin() + 50);
time_release.erase(time_release.end() - 50, time_release.end());
mean_ = rocrtst::CalcMean(time_none, time_release);
return;
}
void FlushLatency::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void FlushLatency::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << std::endl << "======================================="
<< std::endl;
std::cout << "Average cache flush overhead: " << mean_ << "uS"
<< std::endl;
std::cout << "=======================================" << std::endl;
return;
}
@@ -0,0 +1,122 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_INC_FLUSH_LATENCY_H__
#define __ROCRTST_SRC_INC_FLUSH_LATENCY_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include <stdio.h>
class FlushLatency: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
FlushLatency();
//@Brief: Destructor
~FlushLatency();
//@Brief: Set up the testing environment
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Close and clean up the test enrionment
virtual void Close();
//@Brief: Display load bandwidth
virtual void DisplayResults() const;
//@Brief: Set work-item configuration
void SetWorkItemNum() {
#ifdef INTERACTIVE
uint32_t tmp;
printf("Please input the number of CUs you want to try:\n");
int i;
i = scanf("%d", &num_cus_);
printf("Please input the number of groups you want to try:\n");
i = scanf("%d", &num_group_);
printf("Please input the size of each group:\n");
i = scanf("%d", &tmp);
set_group_size(tmp);
printf("Please input the number of kernel loop you want to try:\n");
i = scanf("%d", &kernel_loop_count_);
#else
num_cus_ = 32;
num_group_ = 128;
group_size_ = 256;
kernel_loop_count_ = 16;
#endif
return;
}
private:
//@Brief: number of work item in one group
uint32_t group_size_;
//@Brief: number of group
uint32_t num_group_;
//@Brief: number of CUs
uint32_t num_cus_;
//@Brief: number of kernel loop
uint32_t kernel_loop_count_;
//@Brief: Mean execution time
double mean_;
//@Brief: data size for test
uint64_t data_size_;
};
#endif
@@ -0,0 +1,502 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include "hsa_info.h"
static hsa_status_t get_agent_info(hsa_agent_t, void*);
static hsa_status_t get_pool_info(hsa_amd_memory_pool_t, void*);
static int agent_number = 0;
static bool output_amd = false;
//@Brief: Map to store the peak FLOPS for different agent
std::map<std::string, double> flops_table = { {"Kaveri CPU", 118.4}, {
"S pectre", 737.0
}, {"Carrizo CPU", 67.2}, {"Carrizo GPU", 819.2}
};
//@Brief: Vector to store the agent_names
std::vector<std::string> agent_names = {"Kaveri CPU", "Spectre",
"Carri zo CPU", "Carrizo GPU"
};
HsaInfo::HsaInfo() :
BaseRocR() {
}
HsaInfo::~HsaInfo() {
}
void HsaInfo::SetUp() {
// Get Env Var to determine if output AMD specific info
char* EnvVar = rocrtst::GetEnv("HSA_VENDOR_AMD");
if (NULL != EnvVar) {
output_amd = ('1' == *EnvVar);
}
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
}
void HsaInfo::Run() {
hsa_status_t err;
// Get the system info first
// Get version info
uint16_t major, minor;
if (!rocrtst::CheckProfile(this)) {
return;
}
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Get timestamp frequency
uint64_t timestamp_frequency = 0;
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY,
&timestamp_frequency);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Get maximum duration of a signal wait operation
uint64_t max_wait = 0;
err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &max_wait);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Get Endianness of the system
hsa_endianness_t endianness;
err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &endianness);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Get machine model info
hsa_machine_model_t machine_model;
err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &machine_model);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Print out the results
std::cout << "HSA System Info:" << std::endl;
std::cout << "Runtime Version: " << major <<
"." << minor << std::endl;
std::cout << "System Timestamp Frequency: " <<
timestamp_frequency / 1e6 << "MHz" << std::endl;
std::cout << "Signal Max Wait Duration: " << max_wait
<< "(number of timestamp)" << std::endl;
std::cout << "Machine Model: ";
if (HSA_MACHINE_MODEL_SMALL == machine_model) {
std::cout << "SMALL" << std::endl;
}
else if (HSA_MACHINE_MODEL_LARGE == machine_model) {
std::cout << "LARGE" << std::endl;
}
std::cout << "System Endianness: ";
if (HSA_ENDIANNESS_LITTLE == endianness) {
std::cout << "LITTLE" << std::endl;
}
else if (HSA_ENDIANNESS_BIG == endianness) {
std::cout << "BIG" << std::endl;
}
std::cout << std::endl;
// Iterate every agent and get their info
err = hsa_iterate_agents(get_agent_info, NULL);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
#define RET_IF_HSA_INFO_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << std::endl; \
return (err); \
} \
}
static hsa_status_t get_agent_info(hsa_agent_t agent, void* data) {
int pool_number = 0;
hsa_status_t err;
{
// Increase the number of agent
agent_number++;
// Get agent name and vendor
char name[64];
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
RET_IF_HSA_INFO_ERR(err)
char vendor_name[64];
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &vendor_name);
RET_IF_HSA_INFO_ERR(err)
// Get agent feature
hsa_agent_feature_t agent_feature;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_feature);
RET_IF_HSA_INFO_ERR(err)
// Get profile supported by the agent
hsa_profile_t agent_profile;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile);
RET_IF_HSA_INFO_ERR(err)
// Get floating-point rounding mode
hsa_default_float_rounding_mode_t float_rounding_mode;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE,
&float_rounding_mode);
RET_IF_HSA_INFO_ERR(err)
// Get max number of queue
uint32_t max_queue = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &max_queue);
RET_IF_HSA_INFO_ERR(err)
// Get queue min size
uint32_t queue_min_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE,
&queue_min_size);
RET_IF_HSA_INFO_ERR(err)
// Get queue max size
uint32_t queue_max_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
&queue_max_size);
RET_IF_HSA_INFO_ERR(err)
// Get queue type
hsa_queue_type_t queue_type;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
RET_IF_HSA_INFO_ERR(err)
// Get agent node
uint32_t node;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &node);
RET_IF_HSA_INFO_ERR(err)
// Get device type
hsa_device_type_t device_type;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
RET_IF_HSA_INFO_ERR(err)
// Get cache size
uint32_t cache_size[4];
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, cache_size);
RET_IF_HSA_INFO_ERR(err)
// Get chip id
uint32_t chip_id = 0;
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID,
&chip_id);
RET_IF_HSA_INFO_ERR(err)
// Get cacheline size
uint32_t cacheline_size = 0;
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
&cacheline_size);
RET_IF_HSA_INFO_ERR(err)
// Get Max clock frequency
uint32_t max_clock_freq = 0;
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,
&max_clock_freq);
RET_IF_HSA_INFO_ERR(err)
// Get Agent BDFID
uint16_t bdf_id = 1;
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
&bdf_id);
RET_IF_HSA_INFO_ERR(err)
// Get number of Compute Unit
uint32_t compute_unit = 0;
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
&compute_unit);
RET_IF_HSA_INFO_ERR(err)
// Print out the common results
std::cout << std::endl;
std::cout << "Agent #" << agent_number << ":" << std::endl;
std::cout << "Agent Name: " << name <<
std::endl;
std::cout << "Agent Vendor Name: " <<
vendor_name << std::endl;
if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH
&& agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH)
std::cout << "Agent Feature: KERNEL_DISPATCH & AGENT_DISPATCH"
<< std::endl;
else if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
std::cout << "Agent Feature: KERNEL_DISPATCH" << std::endl;
}
else if (agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) {
std::cout << "Agent Feature: AGENT_DISPATCH" << std::endl;
}
else {
std::cout << "Agent Feature: Not Supported" << std::endl;
}
if (HSA_PROFILE_BASE == agent_profile) {
std::cout << "Agent Profile: BASE_PROFILE" << std::endl;
}
else if (HSA_PROFILE_FULL == agent_profile) {
std::cout << "Agent Profile: FULL_PROFILE" << std::endl;
}
else {
std::cout << "Agent Profile: Not Supported" << std::endl;
}
if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO == float_rounding_mode) {
std::cout << "Agent Floating Rounding Mode: ZERO" << std::endl;
}
else if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR == float_rounding_mode) {
std::cout << "Agent Floating Rounding Mode: NEAR" << std::endl;
}
else {
std::cout << "Agent Floating Rounding Mode: Not Supported" << std::endl;
}
std::cout << "Agent Max Queue Number: " << max_queue << std::endl;
std::cout << "Agent Queue Min Size: " << queue_min_size << std::endl;
std::cout << "Agent Queue Max Size: " << queue_max_size << std::endl;
if (HSA_QUEUE_TYPE_MULTI == queue_type) {
std::cout << "Agent Queue Type: MULTI" << std::endl;
}
else if (HSA_QUEUE_TYPE_SINGLE == queue_type) {
std::cout << "Agent Queue Type: SINGLE" << std::endl;
}
else {
std::cout << "Agent Queue Type: Not Supported" << std::endl;
}
std::cout << "Agent Node: " << node << std::endl;
if (HSA_DEVICE_TYPE_CPU == device_type) {
std::cout << "Agent Device Type: CPU" << std::endl;
}
else if (HSA_DEVICE_TYPE_GPU == device_type) {
std::cout << "Agent Device Type: GPU" << std::endl;
// Get ISA info
hsa_isa_t agent_isa;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa);
RET_IF_HSA_INFO_ERR(err)
}
else {
std::cout << "Agent Device Type: DSP" << std::endl;
}
std::cout << "Agent Cache Info:" << std::endl;
for (int i = 0; i < 4; i++) {
if (cache_size[i]) {
std::cout << " $L" << i + 1 << ": " << cache_size[i] / 1024
<< "KB" << std::endl;
}
}
std::cout << "Agent Chip ID: " << chip_id << std::endl;
std::cout << "Agent Cacheline Size: " << cacheline_size << std::endl;
std::cout << "Agent Max Clock Frequency: " << max_clock_freq << "MHz"
<< std::endl;
std::cout << "Agent BDFID: " << bdf_id << std::endl;
std::cout << "Agent Compute Unit: " << compute_unit << std::endl;
// Output Peak FLOPS and Peak Bandwidth if Env var is set
// TODO: Fan, need to add BW
if (output_amd) {
std::string agent_name = name;
for (size_t i = 0; i < agent_names.size(); i++) {
if (agent_name.compare(agent_names[i]) == 0)
std::cout << "Agent Peak GFLOPS: " << flops_table[agent_name]
<< std::endl;
}
}
// Check if the agent is kernel agent
if (agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
// Get flaf of fast_f16 operation
bool fast_f16;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION,
&fast_f16);
RET_IF_HSA_INFO_ERR(err)
// Get wavefront size
uint32_t wavefront_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,
&wavefront_size);
RET_IF_HSA_INFO_ERR(err)
// Get max total number of work-items in a workgroup
uint32_t workgroup_max_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
&workgroup_max_size);
RET_IF_HSA_INFO_ERR(err)
// Get max number of work-items of each dimension of a work-group
uint16_t workgroup_max_dim[3];
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
&workgroup_max_dim);
RET_IF_HSA_INFO_ERR(err)
// Get max number of a grid per dimension
hsa_dim3_t grid_max_dim;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM,
&grid_max_dim);
RET_IF_HSA_INFO_ERR(err)
// Get max total number of work-items in a grid
uint32_t grid_max_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE,
&grid_max_size);
RET_IF_HSA_INFO_ERR(err)
// Get max number of fbarriers per work group
uint32_t fbarrier_max_size = 0;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE,
&fbarrier_max_size);
RET_IF_HSA_INFO_ERR(err)
// Print info for kernel agent
if (true == fast_f16) {
std::cout << "Agent Fast F16 Operation: TRUE" <<
std::endl;
}
std::cout << "Agent Wavefront Size: " <<
wavefront_size << std::endl;
std::cout << "Agent Workgroup Max Size: " <<
workgroup_max_size << std::endl;
std::cout <<
"Agent Workgroup Max Size Per Dimension: " <<
std::endl;
for (int i = 0; i < 3; i++) {
std::cout << " Dim[" << i <<
"]: " << workgroup_max_dim[i] <<
std::endl;
}
std::cout << "Agent Grid Max Size: " <<
grid_max_size << std::endl;
// Stop using the above kmt functions as per SWDEV-97044
//
uint32_t waves_per_cu = 0;
err = hsa_agent_get_info(agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&waves_per_cu);
RET_IF_HSA_INFO_ERR(err)
std::cout << "Agent Waves Per CU: " <<
waves_per_cu << std::endl;
std::cout << "Agent Max Work-item Per CU: "
<< wavefront_size* waves_per_cu << std::endl;
std::cout << "Agent Grid Max Size per Dimension:" << std::endl;
for (int i = 0; i < 3; i++) {
std::cout << " Dim[" << i <<
"] "
<< reinterpret_cast<uint32_t*>(&grid_max_dim)[i] << std::endl;
}
std::cout << "Agent Max number Of fbarriers Per Workgroup: "
<< fbarrier_max_size << std::endl;
}
}
// Get pool info
std::cout << "Agent Pool Info:" << std::endl;
err = hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, &pool_number);
RET_IF_HSA_INFO_ERR(err)
return HSA_STATUS_SUCCESS;
}
// Implement region iteration function
hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) {
hsa_status_t err;
int* p_int = reinterpret_cast<int*>(data);
(*p_int)++;
std::cout << " Pool #" << *p_int << ":" << std::endl;
err = rocrtst::DumpMemoryPoolInfo(pool, 4);
RET_IF_HSA_INFO_ERR(err)
return err;
}
#undef RET_IF_HSA_INFO_ERR
void HsaInfo::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void HsaInfo::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
@@ -0,0 +1,85 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_HSA_INFO_H__
#define __ROCRTST_SRC_HSA_INFO_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "common/os.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include <iostream>
#include <map>
#include <string>
#include <vector>
//@Brief: This is trying to replicate clinfo
class HsaInfo: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
HsaInfo();
//@Brief: Destructor
virtual ~HsaInfo();
//@Brief: Set up the environment for the test
virtual void SetUp();
//@Brief: Run the test case
virtual void Run();
//@Brief: Display results we got
virtual void DisplayResults() const;
//@Brief: Clean up and close the runtime
virtual void Close();
};
#endif
@@ -0,0 +1,328 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "image_bandwidth.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_image.h"
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
ImageBandwidth::ImageBandwidth(size_t num) :
BaseRocR(), import_bandwidth_ {0.0}, export_bandwidth_ {0.0},
copy_bandwidth_ {0.0} {
format_.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
format_.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
geometry_ = HSA_EXT_IMAGE_GEOMETRY_2D;
set_requires_profile (HSA_PROFILE_FULL);
}
ImageBandwidth::~ImageBandwidth() {
}
const size_t ImageBandwidth::Size[10] = {32, 64, 128, 256, 512, 1024, 2048,
4096, 8192, 16384
};
const char* const ImageBandwidth::Str[10] = {"4K", "16K", "64K", "256K", "1M",
"4M", "16M", "64M", "256M", "1G"
};
void ImageBandwidth::SetUp() {
hsa_status_t err;
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
// Find the global region
err = hsa_amd_agent_iterate_memory_pools(*gpu_dev, rocrtst::FindGlobalPool,
&cpu_pool());
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void ImageBandwidth::Run() {
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
for (int i = 0; i < 10; i++) {
// Create timer for import, export and copy tests
rocrtst::PerfTimer import_timer;
rocrtst::PerfTimer export_timer;
rocrtst::PerfTimer copy_timer;
std::vector<double> import_image;
std::vector<double> export_image;
std::vector<double> copy_image;
// Allocate image buffer in host memory
uint32_t* image_buffer = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(),
Size[i] * Size[i] * sizeof(uint32_t),
0, (void**) &image_buffer);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// rocrtst::CommonCleanUp the image buffer
for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
image_buffer[j] = 0x10101010;
}
// Prepare for 2D image creation
hsa_ext_image_t image_handle;
hsa_ext_image_descriptor_t image_descriptor;
image_descriptor.geometry = geometry_;
image_descriptor.width = Size[i];
image_descriptor.height = Size[i];
image_descriptor.depth = 1;
image_descriptor.array_size = 0;
image_descriptor.format = format_;
// Check if device_ supports at least read and write operation on
// image format
uint32_t capability_mask;
err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
&format_, &capability_mask);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_WRITE)) {
std::cout <<
"Device does not support read and write operation on this kind of image!"
<< std::endl;
ASSERT_NE(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_WRITE, 0);
}
// Get image info
hsa_ext_image_data_info_t image_info;
err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
HSA_ACCESS_PERMISSION_RW, &image_info);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allocate memory for image
uintptr_t ptr_temp = 0;
err = hsa_amd_memory_pool_allocate(cpu_pool(),
image_info.size + image_info.alignment, 0, (void**) &ptr_temp);
// Align the image address
uintptr_t mul = ptr_temp / image_info.alignment;
void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
// rocrtst::CommonCleanUp the image to 0
hsa_amd_memory_fill(ptr_image, 0, image_info.size);
// Create image handle
err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
HSA_ACCESS_PERMISSION_RW, &image_handle);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Set import image region
hsa_dim3_t range = {(uint32_t) Size[i], (uint32_t) Size[i], 1};
hsa_ext_image_region_t image_region;
hsa_dim3_t image_offset = {0, 0, 0};
image_region.offset = image_offset;
image_region.range = range;
size_t iterations = RealIterationNum();
for (uint32_t it = 0; it < iterations; it++) {
// Create a timer
int index = import_timer.CreateTimer();
// Stamp at the beginning
import_timer.StartTimer(index);
// Import image from host
err = hsa_ext_image_import(*gpu_dev, image_buffer, 0, 0, image_handle,
&image_region);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Stamp in the end
import_timer.StopTimer(index);
import_image.push_back(import_timer.ReadTimer(index));
}
// Reset image_buffer
hsa_amd_memory_fill(image_buffer, 0, Size[i] * Size[i] * sizeof(uint32_t));
for (uint32_t it = 0; it < iterations; it++) {
// Export image
// Stamp at the beginning
int index = export_timer.CreateTimer();
export_timer.StartTimer(index);
err = hsa_ext_image_export(*gpu_dev, image_handle, image_buffer, 0, 0,
&image_region);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
export_timer.StopTimer(index);
export_image.push_back(export_timer.ReadTimer(index));
// Check if the value is correct
for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
ASSERT_EQ(image_buffer[j], 0x10101010);
}
}
// Create another image for copy
// Allocate memory for image
uintptr_t ptr_temp2 = 0;
err = hsa_amd_memory_pool_allocate(cpu_pool(),
image_info.size + image_info.alignment, 0, (void**) &ptr_temp2);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Align the image address
mul = ptr_temp2 / image_info.alignment;
void* ptr_image2 = (void*) ((mul + 1) * image_info.alignment);
// rocrtst::CommonCleanUp the image to 0
hsa_amd_memory_fill(ptr_image2, 0, image_info.size);
// Create image handle
hsa_ext_image_t image_handle_copy;
err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image2,
HSA_ACCESS_PERMISSION_RW, &image_handle_copy);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (uint32_t it = 0; it < iterations; it++) {
// Stamp at the beginning
int index = copy_timer.CreateTimer();
copy_timer.StartTimer(index);
err = hsa_ext_image_copy(*gpu_dev, image_handle, &image_offset,
image_handle_copy, &image_offset, &range);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Stamp in the end
copy_timer.StopTimer(index);
copy_image.push_back(copy_timer.ReadTimer(index));
// Check if image data is correct
hsa_amd_memory_fill(image_buffer, 0,
Size[i] * Size[i] * sizeof(uint32_t));
// Export image
err = hsa_ext_image_export(*gpu_dev, image_handle_copy, image_buffer,
0, 0, &image_region);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Check if the value is correct
for (uint32_t j = 0; j < Size[i] * Size[i]; j++) {
ASSERT_EQ(image_buffer[j], 0x10101010);
}
}
// Calculate Bandwidth
import_bandwidth_[i] = CalculateBandwidth(import_image, Size[i]);
export_bandwidth_[i] = CalculateBandwidth(export_image, Size[i]);
copy_bandwidth_[i] = CalculateBandwidth(copy_image, Size[i]);
}
}
double ImageBandwidth::CalculateBandwidth(std::vector<double>& vec,
size_t size) {
double mean = 0.0;
// Delete the first timer result, which is warm up test
vec.erase(vec.begin());
// Sort the results
std::sort(vec.begin(), vec.end());
// Delete the last 20% of the results
vec.erase(vec.begin() + num_iteration(), vec.end());
int num = vec.size();
for (int index = 0; index < num; index++) {
mean += vec[index];
}
mean /= num;
return (double) size * size * 4 / mean / 1024 / 1024 / 1024;
}
void ImageBandwidth::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
fprintf(stdout, "==================================================="
"=========================\n");
fprintf(stdout,
" Size Import Export Copy\n");
for (int i = 0; i < 10; i++) {
fprintf(stdout,
" %s %f(GB/s) %f(GB/s) %f(GB/s)\n",
Str[i], import_bandwidth_[i], export_bandwidth_[i],
copy_bandwidth_[i]);
fprintf(stdout, "================================================="
"===========================\n");
}
}
void ImageBandwidth::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
size_t ImageBandwidth::RealIterationNum() {
return num_iteration() * 1.2 + 1;
}
@@ -0,0 +1,99 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_IMAGE_BANDWIDTH_H__
#define __ROCRTST_SRC_IMAGE_BANDWIDTH_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_image.h"
#include <vector>
class ImageBandwidth: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor for test case of ImageBandwidth
ImageBandwidth(size_t num = 100);
//@Brief: Destructor
virtual ~ImageBandwidth();
//@Brief: Setup the environment for measurement
virtual void SetUp();
//@Brief: Core measurement execution
virtual void Run();
//@Brief: Clean up and retrive the resource
virtual void Close();
//@Brief: Display results
virtual void DisplayResults() const;
private:
//@Brief: Define image size and corresponding string
static const size_t Size[10];
static const char* const Str[10];
//@Brief: Get actual iteration number
size_t RealIterationNum();
//@Brief: Calculate Bandwidth
double CalculateBandwidth(std::vector<double>& vec, size_t size);
protected:
//@Brief: bandwidth data
double import_bandwidth_[10];
double export_bandwidth_[10];
double copy_bandwidth_[10];
//@Brief: Image format
hsa_ext_image_format_t format_;
//@Brief: Image geometry
hsa_ext_image_geometry_t geometry_;
};
#endif
@@ -0,0 +1,270 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "image_load_bandwidth.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/hsatimer.h"
#include "common/helper_funcs.h"
#include "gtest/gtest.h"
#include "hsa/hsa_ext_image.h"
#include <stdio.h>
#include <vector>
// Constructor of the class
ImageLoadBandwidth::ImageLoadBandwidth() :
BaseRocR() {
load_bandwidth_ = 0.0;
image_size_ = 0;
set_requires_profile (HSA_PROFILE_FULL);
}
// Destructor of the class
ImageLoadBandwidth::~ImageLoadBandwidth() {
}
// Set up the environment
void ImageLoadBandwidth::SetUp() {
hsa_agent_t* gpu_dev = gpu_device1();
set_kernel_file_name("load_2d_image.o");
set_kernel_name("&__OpenCL_load_2d_image_kernel");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
//Create a queue with max number size
hsa_queue_t* q = main_queue();
rocrtst::CreateQueue(*gpu_dev, &q);
rocrtst::LoadKernelFromObjFile(this);
//Fill up part of aql
rocrtst::InitializeAQLPacket(this, &aql());
aql().setup = 0;
aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
return;
}
// Run the test
void ImageLoadBandwidth::Run() {
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
hsa_ext_image_descriptor_t image_descriptor;
image_descriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
image_descriptor.width = 256;
image_descriptor.height = 256;
image_descriptor.depth = 1;
image_descriptor.array_size = 0;
image_descriptor.format.channel_type =
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
image_descriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
hsa_ext_image_format_t image_format;
image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
// Check if device_ supports at least read only operation on image format
uint32_t capability_mask;
err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
&image_format, &capability_mask);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY)) {
ASSERT_FALSE(
"Device does not support read and write operation on this kind of image!");
}
// Get image info
hsa_ext_image_data_info_t image_info;
err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
HSA_ACCESS_PERMISSION_RO, &image_info);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
image_size_ = image_info.size;
std::vector<double> time;
for (uint32_t i = 0; i < num_iteration(); i++) {
#ifdef DEBUG
std::cout << ".";
fflush(stdout);
#endif
// Allocate memory space for image
// Find the global region
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
&cpu_pool());
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
uintptr_t ptr_temp = 0;
err = hsa_amd_memory_pool_allocate(cpu_pool(),
image_info.size + image_info.alignment,
0, (void**) &ptr_temp);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, (void*) ptr_temp);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Align the image address
uintptr_t mul = ptr_temp / image_info.alignment;
void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
// rocrtst::CommonCleanUp the image memory to 1
err = hsa_amd_memory_fill(ptr_image, 1, image_info.size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create image handle
hsa_ext_image_t image_handle;
err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
HSA_ACCESS_PERMISSION_RO, &image_handle);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allocate and initialize the kernel argument
typedef struct args_t {
uint64_t arg0;
int* arg1;
int istart;
int iend;
int istep;
} args;
int local_out = 5;
int istart = 0;
int iend = 64;
int istep = 1;
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->arg0 = image_handle.handle;
kern_ptr->arg1 = &local_out;
kern_ptr->istart = istart;
kern_ptr->iend = iend;
kern_ptr->istep = istep;
aql().kernarg_address = kern_ptr;
// Obtain the current queue write index
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
void *q_base_addr = main_queue()->base_address;
// Write the aql packet at the calculated queue index address.
const uint32_t queue_mask = main_queue()->size - 1;
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
hsa_signal_store_release(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
time.push_back(p_timer.ReadTimer(id));
hsa_signal_store_release(signal(), 1);
err = hsa_ext_image_destroy(*gpu_dev, image_handle);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_memory_deregister(ptr_image, image_info.size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
hsa_amd_memory_pool_free((void*) ptr_temp);
}
// Calculte the mean load time
time.erase(time.begin());
#ifdef DEBUG
for (uint32_t i = 0; i < time.size(); i++) {
std::cout << time[i] << std::endl;
}
#endif
double mean_time = rocrtst::CalcMean(time);
load_bandwidth_ = image_size_ / mean_time / 1024 / 1024 / 1024;
}
void ImageLoadBandwidth::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void ImageLoadBandwidth::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "======================================"
"======================================" << std::endl;
std::cout << " Image Size(bytes): LoadBandwidth(GB/S): "
<< std::endl;
std::cout << " " << image_size_ << " "
<< load_bandwidth_ << std::endl;
}
@@ -0,0 +1,82 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
#define __ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "perf_common/perf_base.h"
class ImageLoadBandwidth: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
ImageLoadBandwidth();
//@Brief: Destructor
~ImageLoadBandwidth();
//@Brief: Set up the test environment
virtual void SetUp();
//@Brief: Run the actual testing
virtual void Run();
//@Brief: Clean up the test environment
virtual void Close();
//@Brief: Display results
virtual void DisplayResults() const;
private:
//@Brief: Image Load Bandwidth
double load_bandwidth_;
//@Brief: Image size
size_t image_size_;
};
#endif //__ROCRTST_SRC_INC_IMAGE_LOAD_BANDWIDTH_H__
@@ -0,0 +1,271 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "image_store_bandwidth.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa_ext_image.h"
#include <stdio.h>
#include <vector>
// Constructor of the class
ImageStoreBandwidth::ImageStoreBandwidth() :
BaseRocR() {
store_bandwidth_ = 0.0;
store_bandwidth_ = 0.0;
image_size_ = 0;
set_requires_profile (HSA_PROFILE_FULL);
}
// Destructor of the class
ImageStoreBandwidth::~ImageStoreBandwidth() {
}
// Set up the environment
void ImageStoreBandwidth::SetUp() {
set_kernel_file_name("store_2d_image.o");
set_kernel_name("&__OpenCL_store_2d_image_kernel");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
//Create a queue with max number size
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
set_main_queue(q);
rocrtst::LoadKernelFromObjFile(this);
//Fill up part of aql
rocrtst::InitializeAQLPacket(this, &aql());
aql().setup = 0;
aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
return;
}
// Run the test
void ImageStoreBandwidth::Run() {
hsa_status_t err;
if (!rocrtst::CheckProfile(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
hsa_ext_image_descriptor_t image_descriptor;
image_descriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
image_descriptor.width = 256;
image_descriptor.height = 256;
image_descriptor.depth = 1;
image_descriptor.array_size = 0;
image_descriptor.format.channel_type =
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
image_descriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
hsa_ext_image_format_t image_format;
image_format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
image_format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
// Check if device_ supports at least read only operation on image format
uint32_t capability_mask;
err = hsa_ext_image_get_capability(*gpu_dev, HSA_EXT_IMAGE_GEOMETRY_2D,
&image_format, &capability_mask);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
if (!(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY)) {
std::cout <<
"Device does not support read and write operation on this kind of image!"
<< std::endl;
ASSERT_NE(capability_mask & HSA_EXT_IMAGE_CAPABILITY_READ_ONLY, 0);
}
// Get image info
hsa_ext_image_data_info_t image_info;
err = hsa_ext_image_data_get_info(*gpu_dev, &image_descriptor,
HSA_ACCESS_PERMISSION_RW, &image_info);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
image_size_ = image_info.size;
std::vector<double> time;
for (uint32_t i = 0; i < num_iteration(); i++) {
#ifdef DEBUG
std::cout << ".";
fflush(stdout);
#endif
// Allocate memory space for image
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
&cpu_pool());
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
uintptr_t ptr_temp = 0;
err = hsa_amd_memory_pool_allocate(cpu_pool(),
image_info.size + image_info.alignment,
0, (void**) &ptr_temp);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Align the image address
uintptr_t mul = ptr_temp / image_info.alignment;
void* ptr_image = (void*) ((mul + 1) * image_info.alignment);
// rocrtst::CommonCleanUp the image memory to 0
err = hsa_amd_memory_fill(ptr_image, 0, image_info.size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create image handle
hsa_ext_image_t image_handle;
err = hsa_ext_image_create(*gpu_dev, &image_descriptor, ptr_image,
HSA_ACCESS_PERMISSION_RO, &image_handle);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allocate and initialize the kernel argument
typedef struct args_t {
uint64_t arg0;
int istart;
int iend;
int istep;
} args;
//int local_out = 5;
int istart = 0;
int iend = 64;
int istep = 1;
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->arg0 = image_handle.handle;
kern_ptr->istart = istart;
kern_ptr->iend = iend;
kern_ptr->istep = istep;
aql().kernarg_address = kern_ptr;
// Obtain the current queue write index
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
void *q_base_addr = main_queue()->base_address;
// Write the aql packet at the calculated queue index address.
const uint32_t queue_mask = main_queue()->size - 1;
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask] = aql();
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
((hsa_kernel_dispatch_packet_t*)q_base_addr)[index & queue_mask].header |=
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
hsa_signal_store_release(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE))
;
p_timer.StopTimer(id);
time.push_back(p_timer.ReadTimer(id));
hsa_signal_store_release(signal(), 1);
err = hsa_ext_image_destroy(*gpu_dev, image_handle);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_memory_deregister(ptr_image, image_info.size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
hsa_amd_memory_pool_free(reinterpret_cast<void*>(ptr_temp));
}
// Calculte the mean load time
time.erase(time.begin());
#ifdef DEBUG
for (size_t i = 0; i < time.size(); i++) {
std::cout << time[i] << std::endl;
}
#endif
double mean_time = rocrtst::CalcMean(time);
std::cout << "mean time: " << mean_time << std::endl;
store_bandwidth_ = image_size_ / mean_time / 1024 / 1024 / 1024;
}
void ImageStoreBandwidth::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void ImageStoreBandwidth::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "============================================="
"===============================" << std::endl;
std::cout << " Image Size(bytes): StoreBandwidth(GB/S): "
<< std::cout;
std::cout << " " << image_size_ << " "
<< store_bandwidth_ << std::endl;
}
@@ -0,0 +1,82 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
#define __ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
class ImageStoreBandwidth: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Constructor
ImageStoreBandwidth();
//@Brief: Destructor
~ImageStoreBandwidth();
//@Brief: Set up the test environment
virtual void SetUp();
//@Brief: Run the actual testing
virtual void Run();
//@Brief: Clean up the test environment
virtual void Close();
//@Brief: Display results
virtual void DisplayResults() const;
private:
//@Brief: Image Store Bandwidth
double store_bandwidth_;
//@Brief: Image size
size_t image_size_;
};
#endif //__ROCRTST_SRC_INC_IMAGE_STORE_BANDWIDTH_H__
@@ -0,0 +1,12 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__Empty_kernel()
{
ret;
};
@@ -0,0 +1,88 @@
module &m:1:0:$full:$large:$default;
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
*
* HSAF is granting you permission to use this software and documentation (if
* any) (collectively, the "Materials") pursuant to the terms and conditions
* of the Software License Agreement included with the Materials. If you do
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
*/
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
/**
* @brief Hsail kernel to benchmark READ accesses to system memory.
* The kernel is given a input buffer from which each each thread will
* read. The thread will read from multiple locations of the input buffer.
* The locations to read from is determined by the work-item Id, the function
* being work-item Id modulo total number of work-items in the global work grid.
* So given a global work grid of 16 work-items the reads by a thread with absolute
* id 4 would be 4, 20, 36, 52, etc.
*
* @NOTE: A constraint imposed by the kernel is that the buffer size be large
* enough to support 16 reads by each thread. So a dispatch of 8 work-items
* should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
*
* @param bufStart beginning byte address of user buffer in system memory
* from which kernel threads could read
*
* @param bufEnd byte address that follows the end of user buffer. Accessing
* memory at bufEnd is illegal
*
* @param addrStep size by which to increment byte address following each read
* operation. The value represents total number of work-items * sizeof(uint32_t)
*
* @param outAddr argument that is passed by the user to be updated with values
* read by the kernel threads. This is ensure compiler and finalizer do not eliminate
* code because the values being read are not used in any meaningfule way.
*
*/
prog kernel &main(kernarg_u64 %outAddr) {
pragma "AMD RTI", "ARGSTART:__SysMemLoad";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemLoad";
ld_kernarg_u64 $d0, [%outAddr];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Read operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
cvt_u64_u32 $d4, $s0;
// Add index to base address of user buffer to obtain
// effective address for access
add_u64 $d0, $d0, $d4;
mov_u32 $s2, 1;
st_global_u32 $s2, [$d0];
};
@@ -0,0 +1,88 @@
module &m:1:0:$base:$large:$default;
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
*
* HSAF is granting you permission to use this software and documentation (if
* any) (collectively, the "Materials") pursuant to the terms and conditions
* of the Software License Agreement included with the Materials. If you do
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
*/
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
/**
* @brief Hsail kernel to benchmark READ accesses to system memory.
* The kernel is given a input buffer from which each each thread will
* read. The thread will read from multiple locations of the input buffer.
* The locations to read from is determined by the work-item Id, the function
* being work-item Id modulo total number of work-items in the global work grid.
* So given a global work grid of 16 work-items the reads by a thread with absolute
* id 4 would be 4, 20, 36, 52, etc.
*
* @NOTE: A constraint imposed by the kernel is that the buffer size be large
* enough to support 16 reads by each thread. So a dispatch of 8 work-items
* should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
*
* @param bufStart beginning byte address of user buffer in system memory
* from which kernel threads could read
*
* @param bufEnd byte address that follows the end of user buffer. Accessing
* memory at bufEnd is illegal
*
* @param addrStep size by which to increment byte address following each read
* operation. The value represents total number of work-items * sizeof(uint32_t)
*
* @param outAddr argument that is passed by the user to be updated with values
* read by the kernel threads. This is ensure compiler and finalizer do not eliminate
* code because the values being read are not used in any meaningfule way.
*
*/
prog kernel &main(kernarg_u64 %outAddr) {
pragma "AMD RTI", "ARGSTART:__SysMemLoad";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemLoad";
ld_kernarg_u64 $d0, [%outAddr];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Read operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
cvt_u64_u32 $d4, $s0;
// Add index to base address of user buffer to obtain
// effective address for access
add_u64 $d0, $d0, $d4;
mov_u32 $s2, 1;
st_global_u32 $s2, [$d0];
};
@@ -0,0 +1,109 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_load_2d_image_kernel(
kernarg_rwimg %input,
kernarg_u64 %result,
kernarg_u32 %istart,
kernarg_u32 %iend,
kernarg_u32 %istep)
{
pragma "AMD RTI", "ARGSTART:__OpenCL_load_2d_image_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__OpenCL_load_2d_image_kernel";
@__OpenCL_load_2d_image_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 1;
workitemabsid_u32 $s1, 0;
ld_kernarg_rwimg $d5, [%input];
ld_kernarg_u32 $s2, [%istart];
ld_kernarg_u32 $s3, [%iend];
ld_kernarg_u32 $s4, [%istep];
add_u32 $s9, 0, 0; // reset s9 to zero
@loop:
add_u32 $s2, $s2, $s4;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0); //(coordWidth, coordHeight)
add_u32 $s9, $s9, $s5;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s6;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s7;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s8;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s5;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s6;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
add_u32 $s9, $s9, $s7;
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
ldimage_v4_2d_u32_rwimg_u32 ($s5, $s6, $s7, $s8), $d5, ($s1, $s0);
ld_kernarg_align(8)_width(all)_u64 $d4, [%result];
add_u32 $s9, $s9, $s8;
st_u32 $s9, [$d4];
//loop until we hit condition
cmp_lt_b1_u32 $c0, $s2, $s3;
cbr_b1 $c0, @loop;
};
@@ -0,0 +1,37 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
/* This function takes in 2 memory locations, one storing a number of
iterations to execute, and the other a place to store a result.
The function iterates through a loop "iteration" times, and stores
the number of iterations executed in the "results" location.
A successful run is when the value stored in %iteration is the
same as the value store in %results.
*/
prog kernel &__simple_kernel(
kernarg_u64 %iteration,
kernarg_u64 %results)
{
ret;
ld_kernarg_align(8)_width(all)_u64 $d1, [%iteration];
ld_kernarg_align(8)_width(all)_u64 $d2, [%results];
ld_global_u32 $s1, [$d1];
mov_u32 $s2, 0;
@loop:
add_u32 $s2, $s2, 1;
cmp_lt_b1_u32 $c0, $s2, $s1;
cbr_b1 $c0, @loop;
st_global_u32 $s2, [$d2];
ret;
};
@@ -0,0 +1,28 @@
module &m:1:0:$base:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__simple_kernel(
kernarg_u64 %iteration,
kernarg_u64 %results)
{
ld_kernarg_align(8)_width(all)_u64 $d1, [%iteration];
ld_kernarg_align(8)_width(all)_u64 $d2, [%results];
ld_global_u32 $s1, [$d1];
mov_u32 $s2, 0;
@loop:
add_u32 $s2, $s2, 1;
cmp_lt_b1_u32 $c0, $s2, $s1;
cbr_b1 $c0, @loop;
st_global_u32 $s2, [$d2];
ret;
};
@@ -0,0 +1,105 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_store_2d_image_kernel(
kernarg_rwimg %output,
kernarg_u32 %istart,
kernarg_u32 %iend,
kernarg_u32 %istep)
{
pragma "AMD RTI", "ARGSTART:__OpenCL_store_2d_image_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__OpenCL_store_2d_image_kernel";
@__OpenCL_store_2d_image_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 1;
workitemabsid_u32 $s1, 0;
ld_kernarg_rwimg $d5, [%output];
ld_kernarg_u32 $s2, [%istart];
ld_kernarg_u32 $s3, [%iend];
ld_kernarg_u32 $s4, [%istep];
mov_b32 $s5, 0;
@loop:
add_u32 $s2, $s2, $s4;
add_u32 $s5, $s5, 1;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//force to retrieve different image elements
add_u32 $s1, $s1, 64;
and_b32 $s1, $s1, 255;
add_u32 $s0, $s0, 64;
and_b32 $s0, $s0, 255;
add_u32 $s5, $s5, $s2;
stimage_v4_2d_u32_rwimg_u32 ($s5, $s5, $s5, $s5), $d5, ($s1, $s0);
//loop until we hit condition
cmp_lt_b1_u32 $c0, $s2, $s3;
cbr_b1 $c0, @loop;
ret;
};
@@ -0,0 +1,237 @@
module &m:1:0:$full:$large:$default;
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
*
* HSAF is granting you permission to use this software and documentation (if
* any) (collectively, the "Materials") pursuant to the terms and conditions
* of the Software License Agreement included with the Materials. If you do
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
*/
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
/**
* @brief Hsail kernel to benchmark READ accesses to system memory.
* The kernel is given a input buffer from which each each thread will
* read. The thread will read from multiple locations of the input buffer.
* The locations to read from is determined by the work-item Id, the function
* being work-item Id modulo total number of work-items in the global work grid.
* So given a global work grid of 16 work-items the reads by a thread with absolute
* id 4 would be 4, 20, 36, 52, etc.
*
* @NOTE: A constraint imposed by the kernel is that the buffer size be large
* enough to support 16 reads by each thread. So a dispatch of 8 work-items
* should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
*
* @param bufStart beginning byte address of user buffer in system memory
* from which kernel threads could read
*
* @param bufEnd byte address that follows the end of user buffer. Accessing
* memory at bufEnd is illegal
*
* @param addrStep size by which to increment byte address following each read
* operation. The value represents total number of work-items * sizeof(uint32_t)
*
* @param outAddr argument that is passed by the user to be updated with values
* read by the kernel threads. This is ensure compiler and finalizer do not eliminate
* code because the values being read are not used in any meaningfule way.
*
*/
prog kernel &__SysMemLoad(kernarg_u64 %bufStart,
kernarg_u64 %bufEnd,
kernarg_u64 %addrStep,
kernarg_u64 %outAddr) {
pragma "AMD RTI", "ARGSTART:__SysMemLoad";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemLoad";
// Retrieve the values of input arguments
// bufStart refers to the starting byte address
// bufEnd refers to the end of byte address
// addrStep refers to the product of total number
// of work-items in the grid * sizeof(uint32_t)
ld_kernarg_u64 $d0, [%bufStart];
ld_kernarg_u64 $d1, [%bufEnd];
ld_kernarg_u64 $d2, [%addrStep];
ld_kernarg_u64 $d3, [%outAddr];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Read operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
cvt_u64_u32 $d4, $s0;
// Add index to base address of user buffer to obtain
// effective address for access
add_u64 $d0, $d0, $d4;
add_u64 $d3, $d3, $d4;
// Initialize thread's read accumulator to zero
mov_u32 $s2, 0;
@loop:
// Read sixteeen values with a stride that is
// determined by the total number of work-items
// in the global grid
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
// Update output buffer with values read
// from input buffer
st_global_u32 $s2, [$d3];
};
@@ -0,0 +1,237 @@
module &m:1:0:$base:$large:$default;
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
*
* HSAF is granting you permission to use this software and documentation (if
* any) (collectively, the "Materials") pursuant to the terms and conditions
* of the Software License Agreement included with the Materials. If you do
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
*/
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
/**
* @brief Hsail kernel to benchmark READ accesses to system memory.
* The kernel is given a input buffer from which each each thread will
* read. The thread will read from multiple locations of the input buffer.
* The locations to read from is determined by the work-item Id, the function
* being work-item Id modulo total number of work-items in the global work grid.
* So given a global work grid of 16 work-items the reads by a thread with absolute
* id 4 would be 4, 20, 36, 52, etc.
*
* @NOTE: A constraint imposed by the kernel is that the buffer size be large
* enough to support 16 reads by each thread. So a dispatch of 8 work-items
* should allocate enough buffer for 8 * 16 * sizeof(uint32_t).
*
* @param bufStart beginning byte address of user buffer in system memory
* from which kernel threads could read
*
* @param bufEnd byte address that follows the end of user buffer. Accessing
* memory at bufEnd is illegal
*
* @param addrStep size by which to increment byte address following each read
* operation. The value represents total number of work-items * sizeof(uint32_t)
*
* @param outAddr argument that is passed by the user to be updated with values
* read by the kernel threads. This is ensure compiler and finalizer do not eliminate
* code because the values being read are not used in any meaningfule way.
*
*/
prog kernel &__SysMemLoad(kernarg_u64 %bufStart,
kernarg_u64 %bufEnd,
kernarg_u64 %addrStep,
kernarg_u64 %outAddr) {
pragma "AMD RTI", "ARGSTART:__SysMemLoad";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemLoad";
// Retrieve the values of input arguments
// bufStart refers to the starting byte address
// bufEnd refers to the end of byte address
// addrStep refers to the product of total number
// of work-items in the grid * sizeof(uint32_t)
ld_kernarg_u64 $d0, [%bufStart];
ld_kernarg_u64 $d1, [%bufEnd];
ld_kernarg_u64 $d2, [%addrStep];
ld_kernarg_u64 $d3, [%outAddr];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Read operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
cvt_u64_u32 $d4, $s0;
// Add index to base address of user buffer to obtain
// effective address for access
add_u64 $d0, $d0, $d4;
add_u64 $d3, $d3, $d4;
// Initialize thread's read accumulator to zero
mov_u32 $s2, 0;
@loop:
// Read sixteeen values with a stride that is
// determined by the total number of work-items
// in the global grid
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
ld_global_u32 $s1, [$d0];
add_u32 $s2, $s1, $s2;
add_u64 $d0, $d0, $d2;
// Update output buffer with values read
// from input buffer
st_global_u32 $s2, [$d3];
};
@@ -0,0 +1,105 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__SysMemStore(kernarg_u64 %bufStart,
kernarg_u64 %bufEnd,
kernarg_u64 %addrStep,
kernarg_u64 %deadArg) {
// Directives for Compiler
pragma "AMD RTI", "ARGSTART:__SysMemStore";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemStore";
// Retrieve the values of input arguments
// bufStart refers to the starting byte address
// bufEnd refers to the end of byte address
// addrStep refers to the product of total number
// of work-items in the grid * sizeof(uint32_t)
ld_kernarg_u64 $d0, [%bufStart];
ld_kernarg_u64 $d1, [%bufEnd];
ld_kernarg_u64 $d2, [%addrStep];
ld_kernarg_u64 $d3, [%deadArg];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Write operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
// Convert the thread id into a 64-bit number
// and add it to the starting address of user
// buffer to obtain effective address for access
cvt_u64_u32 $d4, $s0;
add_u64 $d0, $d0, $d4;
@loop:
// Write sixteeen values with a stride that is
// determined by the total number of work-items
// in the global grid
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
// Loop until we hit end of buffer [%bufEnd]
cmp_lt_b1_u64 $c0, $d0, $d1;
cbr_b1 $c0, @loop;
};
@@ -0,0 +1,105 @@
module &m:1:0:$base:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__SysMemStore(kernarg_u64 %bufStart,
kernarg_u64 %bufEnd,
kernarg_u64 %addrStep,
kernarg_u64 %deadArg) {
// Directives for Compiler
pragma "AMD RTI", "ARGSTART:__SysMemStore";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__SysMemStore";
// Retrieve the values of input arguments
// bufStart refers to the starting byte address
// bufEnd refers to the end of byte address
// addrStep refers to the product of total number
// of work-items in the grid * sizeof(uint32_t)
ld_kernarg_u64 $d0, [%bufStart];
ld_kernarg_u64 $d1, [%bufEnd];
ld_kernarg_u64 $d2, [%addrStep];
ld_kernarg_u64 $d3, [%deadArg];
// Compute the absolute id of current thread
// and shift it by two to get index into user
// buffer to access for Write operation
workitemflatabsid_u32 $s0;
shl_u32 $s0, $s0, 2;
// Convert the thread id into a 64-bit number
// and add it to the starting address of user
// buffer to obtain effective address for access
cvt_u64_u32 $d4, $s0;
add_u64 $d0, $d0, $d4;
@loop:
// Write sixteeen values with a stride that is
// determined by the total number of work-items
// in the global grid
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
st_global_u32 $s0, [$d0];
add_u64 $d0, $d0, $d2;
// Loop until we hit end of buffer [%bufEnd]
cmp_lt_b1_u64 $c0, $d0, $d1;
cbr_b1 $c0, @loop;
};
@@ -0,0 +1,53 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_vec_assign_kernel(
kernarg_u64 %buf,
kernarg_u32 %num)
{
pragma "AMD RTI", "ARGSTART:__OpenCL_vec_assign_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__OpenCL_vec_assign_kernel";
@__OpenCL_vec_assign_kernel_entry:
// BB#0: // %entry
ld_kernarg_align(8)_width(all)_u64 $d0, [%buf];
ld_global_u32 $s1, [$d0];
ld_kernarg_align(4)_width(all)_u32 $s0, [%num];
cmp_ge_b1_s32 $c0, $s1, $s0;
cbr_b1 $c0, @BB0_4;
// BB#1: // %while.body.lr.ph
workitemabsid_u32 $s1, 0;
cmp_eq_b1_s32 $c0, $s1, 0;
cbr_b1 $c0, @BB0_2;
@BB0_3:
// %while.cond.backedge
ld_global_u32 $s1, [$d0];
cmp_lt_b1_s32 $c0, $s1, $s0;
cbr_b1 $c0, @BB0_3;
br @BB0_4;
@BB0_2:
// %while.cond.backedge.us
ld_global_u32 $s1, [$d0];
add_u32 $s1, $s1, 1;
st_global_u32 $s1, [$d0];
ld_global_u32 $s1, [$d0];
cmp_lt_b1_s32 $c0, $s1, $s0;
cbr_b1 $c0, @BB0_2;
@BB0_4:
// %while.end
ret;
};
@@ -0,0 +1,108 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_matrixTranspose_kernel(
kernarg_u64 %__global_offset_0,
kernarg_u64 %__global_offset_1,
kernarg_u64 %__global_offset_2,
kernarg_u64 %__printf_buffer,
kernarg_u64 %__vqueue_pointer,
kernarg_u64 %__aqlwrap_pointer,
kernarg_u64 %inBuf,
kernarg_u64 %outBuf,
kernarg_u64 %localBuf,
kernarg_u32 %blockSize,
kernarg_u32 %width,
kernarg_u32 %height)
{
pragma "AMD RTI", "ARGSTART:__OpenCL_matrixTranspose_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "memory:private:0";
pragma "AMD RTI", "memory:region:0";
pragma "AMD RTI", "memory:local:0";
pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0";
pragma "AMD RTI", "value:__global_offset_1:u64:1:1:16";
pragma "AMD RTI", "value:__global_offset_2:u64:1:1:32";
pragma "AMD RTI", "pointer:__printf_buffer:u8:1:1:48:uav:7:1:RW:0:0:0";
pragma "AMD RTI", "value:__vqueue_pointer:u64:1:1:64";
pragma "AMD RTI", "value:__aqlwrap_pointer:u64:1:1:80";
pragma "AMD RTI", "pointer:inBuf:u32:1:1:96:uav:7:4:RW:0:1:0";
pragma "AMD RTI", "pointer:outBuf:u32:1:1:112:uav:7:4:RW:0:1:0";
pragma "AMD RTI", "pointer:localBuf:u32:1:1:128:l:7:4:RW:0:0:0";
pragma "AMD RTI", "value:blockSize:u32:1:1:144";
pragma "AMD RTI", "value:width:u32:1:1:160";
pragma "AMD RTI", "value:height:u32:1:1:176";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "enqueue_kernel:0";
pragma "AMD RTI", "kernel_index:0";
pragma "AMD RTI", "reflection:0:size_t";
pragma "AMD RTI", "reflection:1:size_t";
pragma "AMD RTI", "reflection:2:size_t";
pragma "AMD RTI", "reflection:3:size_t";
pragma "AMD RTI", "reflection:4:size_t";
pragma "AMD RTI", "reflection:5:size_t";
pragma "AMD RTI", "reflection:6:uint*";
pragma "AMD RTI", "reflection:7:uint*";
pragma "AMD RTI", "reflection:8:uint*";
pragma "AMD RTI", "reflection:9:uint";
pragma "AMD RTI", "reflection:10:uint";
pragma "AMD RTI", "reflection:11:uint";
pragma "AMD RTI", "ARGEND:__OpenCL_matrixTranspose_kernel";
@__OpenCL_matrixTranspose_kernel_entry:
// BB#0: // %entry
workitemid_u32 $s0, 1;
ld_kernarg_align(4)_width(all)_u32 $s1, [%blockSize];
workitemid_u32 $s2, 0;
mad_u32 $s3, $s2, $s1, $s0;
cvt_u64_u32 $d1, $s3;
workitemabsid_u32 $s3, 0;
cvt_u64_u32 $d0, $s3;
ld_kernarg_align(8)_width(all)_u64 $d2, [%__global_offset_0];
add_u64 $d0, $d0, $d2;
workitemabsid_u32 $s5, 1;
workgroupid_u32 $s4, 0;
workgroupid_u32 $s3, 1;
shl_u64 $d1, $d1, 2;
mad_u32 $s3, $s3, $s1, $s2;
mad_u32 $s4, $s4, $s1, $s0;
cvt_u64_u32 $d2, $s5;
ld_kernarg_align(8)_width(all)_u64 $d3, [%__global_offset_1];
cvt_u32_u64 $s5, $d0;
add_u64 $d0, $d2, $d3;
cvt_u32_u64 $s6, $d0;
ld_kernarg_align(4)_width(all)_u32 $s7, [%width];
ld_kernarg_align(8)_width(all)_u64 $d0, [%localBuf];
ld_kernarg_align(4)_width(all)_u32 $s8, [%height];
mad_u32 $s3, $s4, $s8, $s3;
add_u64 $d1, $d0, $d1;
cvt_u32_u64 $s4, $d1;
mad_u32 $s5, $s6, $s7, $s5;
cvt_u64_u32 $d1, $s5;
shl_u64 $d2, $d1, 2;
ld_kernarg_align(8)_width(all)_u64 $d1, [%outBuf];
ld_kernarg_align(8)_width(all)_u64 $d3, [%inBuf];
add_u64 $d2, $d3, $d2;
ld_global_align(4)_u32 $s5, [$d2];
st_group_align(4)_u32 $s5, [$s4];
cvt_u64_u32 $d2, $s3;
shl_u64 $d2, $d2, 2;
add_u64 $d1, $d1, $d2;
mad_u32 $s0, $s0, $s1, $s2;
cvt_u64_u32 $d2, $s0;
shl_u64 $d2, $d2, 2;
add_u64 $d0, $d0, $d2;
cvt_u32_u64 $s0, $d0;
barrier;
ld_group_align(4)_u32 $s0, [$s0];
st_global_align(4)_u32 $s0, [$d1];
ret;
};
@@ -0,0 +1,34 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__vector_copy_kernel(
kernarg_u64 %a,
kernarg_u64 %b)
{
pragma "AMD RTI", "ARGSTART:__vector_copy_kernel";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "uavid:8";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "ARGEND:__vector_copy_kernel";
@__vector_copy_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 0;
cvt_s64_s32 $d0, $s0;
shl_u64 $d0, $d0, 2;
ld_kernarg_align(8)_width(all)_u64 $d1, [%b];
add_u64 $d1, $d1, $d0;
ld_kernarg_align(8)_width(all)_u64 $d2, [%a];
add_u64 $d0, $d2, $d0;
ld_global_u32 $s0, [$d0];
st_global_u32 $s0, [$d1];
ret;
};
@@ -0,0 +1,64 @@
module &m:1:0:$base:$large:$default;
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
decl prog function &abort()();
prog kernel &__vector_copy_kernel(
kernarg_u64 %in,
kernarg_u64 %out)
{
@__vector_copy_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 0;
cvt_s64_s32 $d0, $s0;
shl_u64 $d0, $d0, 2;
ld_kernarg_align(8)_width(all)_u64 $d1, [%out];
add_u64 $d1, $d1, $d0;
ld_kernarg_align(8)_width(all)_u64 $d2, [%in];
add_u64 $d0, $d2, $d0;
ld_global_u32 $s0, [$d0];
st_global_u32 $s0, [$d1];
ret;
};
@@ -0,0 +1,64 @@
module &m:1:0:$full:$large:$default;
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
decl prog function &abort()();
prog kernel &__vector_copy_kernel(
kernarg_u64 %in,
kernarg_u64 %out)
{
@__vector_copy_kernel_entry:
// BB#0: // %entry
workitemabsid_u32 $s0, 0;
cvt_s64_s32 $d0, $s0;
shl_u64 $d0, $d0, 2;
ld_kernarg_align(8)_width(all)_u64 $d1, [%out];
add_u64 $d1, $d1, $d0;
ld_kernarg_align(8)_width(all)_u64 $d2, [%in];
add_u64 $d0, $d2, $d0;
ld_global_u32 $s0, [$d0];
st_global_u32 $s0, [$d1];
ret;
};
@@ -0,0 +1,280 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "cp_process_time.h"
#include "cu_masking.h"
#include "device_load_bandwidth.h"
#include "device_store_bandwidth.h"
#include "dispatch_time.h"
#include "flush_latency.h"
#include "gtest/gtest.h"
#include "hsa_info.h"
#include "image_bandwidth.h"
#include "image_load_bandwidth.h"
#include "image_store_bandwidth.h"
#include "matrix_transpose.h"
#include "memory_copy.h"
#include "memory_allocation.h"
#include "memory_async_copy.h"
#include "queue_concurrency.h"
#include "queue_create_destroy_latency.h"
#include "system_load_bandwidth.h"
#include "system_store_bandwidth.h"
#include "vector_copy.h"
/**
* Try to order tests from fastest running to slowest running.
*/
// DisplayResultsResults HSA system information first.
TEST(rocrtst, Feature_Hsa_Info) {
HsaInfo hi;
hi.SetUp();
hi.Run();
hi.Close();
}
// Requires HSA_PFOFILE_FULL
TEST(rocrtst, Perf_Image_Store_Bandwidth) {
ImageStoreBandwidth isb;
isb.SetUp();
isb.Run();
isb.DisplayResults();
isb.Close();
}
// Requires HSA_PFOFILE_FULL
TEST(rocrtst, Perf_Image_Load_Bandwidth) {
ImageLoadBandwidth ilb;
ilb.SetUp();
ilb.Run();
ilb.DisplayResults();
ilb.Close();
}
// Requires HSA_PFOFILE_FULL
TEST(rocrtst, Perf_Image_Bandwidth) {
ImageBandwidth ib;
ib.SetUp();
ib.Run();
ib.DisplayResults();
ib.Close();
}
// Requires HSA_PFOFILE_FULL
TEST(rocrtst, Perf_Queue_Concurrency) {
QueueConcurrency mc;
mc.SetUp();
mc.Run();
mc.DisplayResults();
mc.Close();
}
TEST(rocrtst, Feature_Cu_Masking) {
CuMasking cm;
cm.SetUp();
cm.Run();
cm.Close();
}
TEST(rocrtst, Perf_Flush_Latency) {
FlushLatency fl;
fl.SetUp();
fl.Run();
fl.DisplayResults();
fl.Close();
}
// This test apparently has some sort of memory bounds overwrite
// issue with the out_data_ buffer. Commenting out the free of
// out_data_ avoids the problem. Left uncommented, a crash will
// occur immediately or some time after.
TEST(rocrtst, DISABLED_Perf_Device_Memory_Store_Bandwidth) {
DeviceStoreBandwidth slb;
slb.SetUp();
slb.Run();
slb.DisplayResults();
slb.Close();
}
// This test apparently has some sort of memory bounds overwrite
// issue with the out_data_ buffer. Commenting out the free of
// out_data_ avoids the problem. Left uncommented, a crash will
// occur immediately or some time after.
TEST(rocrtst, DISABLED_Perf_Device_Memory_Load_Bandwidth) {
DeviceLoadBandwidth slb;
slb.SetUp();
slb.Run();
slb.DisplayResults();
slb.Close();
}
TEST(rocrtst, Perf_Dispatch_Time_Single_SpinWait) {
DispatchTime dt;
dt.set_num_iteration(100);
dt.UseDefaultSignal(true);
dt.LaunchSingleKernel(true);
dt.SetUp();
dt.Run();
dt.DisplayResults();
dt.Close();
}
TEST(rocrtst, Perf_Dispatch_Time_Single_Interrupt) {
DispatchTime dt;
dt.UseDefaultSignal(false);
dt.LaunchSingleKernel(true);
dt.SetUp();
dt.Run();
dt.DisplayResults();
dt.Close();
}
TEST(rocrtst, Perf_Dispatch_Time_Multi_SpinWait) {
DispatchTime dt;
dt.UseDefaultSignal(true);
dt.LaunchSingleKernel(false);
dt.SetUp();
dt.Run();
dt.DisplayResults();
dt.Close();
}
TEST(rocrtst, Perf_Dispatch_Time_Multi_Interrupt) {
DispatchTime dt;
dt.UseDefaultSignal(false);
dt.LaunchSingleKernel(false);
dt.SetUp();
dt.Run();
dt.DisplayResults();
dt.Close();
}
TEST(rocrtst, DISABLED_Perf_CpProcessTime) {
CpProcessTime cpt;
cpt.set_num_iteration(10);
cpt.SetUp();
cpt.Run();
cpt.DisplayResults();
cpt.Close();
}
TEST(rocrtst, Perf_Memory_Allocation) {
MemoryAllocation ma(10);
ma.SetUp();
ma.Run();
ma.DisplayResults();
ma.Close();
}
#if MEM_POOL_FILL_BUG
TEST(rocrtst, Perf_Queue_Latency) {
QueueLatency ql;
ql.set_num_iteration(10);
ql.SetUp();
ql.Run();
ql.DisplayResults();
ql.Close();
}
TEST(rocrtst, Perf_System_Memory_Load_Bandwidth) {
SystemLoadBandwidth slb;
slb.SetUp();
slb.Run();
slb.DisplayResults();
slb.Close();
}
TEST(rocrtst, Perf_System_Memory_Store_Bandwidth) {
SystemStoreBandwidth ssb;
ssb.SetUp();
ssb.Run();
ssb.DisplayResults();
ssb.Close();
}
TEST(rocrtst, Perf_Memory_Copy) {
MemoryCopy mc;
mc.set_num_iteration(10);
mc.SetUp();
mc.Run();
mc.DisplayResults();
mc.Close();
}
#endif
#if 0
// These tests were not complete. Needs research/work.
TEST(rocrtst, Feature_Vector_Copy) {
VectorCopy vc;
vc.SetUp();
vc.Run();
vc.Close();
}
TEST(rocrtst, Perf_Matrix_Transpose) {
MatrixTranspose mt;
mt.SetUp();
mt.Run();
mt.DisplayResults();
mt.Close();
}
#endif
//#if NEED_TO_MAKE_BATCH
TEST(rocrtst, Perf_Memory_Async_Copy) {
MemoryAsyncCopy mac;
mac.set_num_iteration(10);
mac.SetUp();
mac.Run();
mac.DisplayResults();
mac.Close();
}
//#endif
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
@@ -0,0 +1,289 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "matrix_transpose.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_finalize.h"
#include "gtest/gtest.h"
#include <stdlib.h>
#include <algorithm>
static const unsigned int NUM_BLOCK_SIZES = 2;
static const unsigned int blockSizes[NUM_BLOCK_SIZES] = {8, 16};
static const unsigned int NUM_MATRIX_DIMS = 2;
static const unsigned int matrixDims[NUM_MATRIX_DIMS] = {1024, 64};
MatrixTranspose::MatrixTranspose(void) :
BaseRocR() {
in_buffer_sys_ = NULL;
out_buffer_sys_ = NULL;
in_buffer_ = NULL;
out_buffer_ = NULL;
width_ = 0;
height_ = 0;
buf_size_ = 0;
block_size_ = 0;
time_mean_ = 0.0;
}
MatrixTranspose::~MatrixTranspose(void) {
}
void MatrixTranspose::SetUp(void) {
hsa_status_t err;
InitializeData();
set_kernel_file_name("transpose_kernel.o");
set_kernel_name("&__OpenCL_matrixTranspose_kernel");
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* gpu_dev = gpu_device1();
hsa_agent_t* cpu_dev = cpu_device();
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
&cpu_pool());
ASSERT_EQ(err, HSA_STATUS_INFO_BREAK);
err = hsa_amd_memory_pool_allocate(cpu_pool(), buf_size_, 0,
(void**) &in_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(cpu_pool(), buf_size_, 0,
(void**) &out_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, in_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, out_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create a queue
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
set_main_queue(q);
rocrtst::LoadKernelFromObjFile(this);
// Fill up aql packet
rocrtst::InitializeAQLPacket(this, &aql());
aql().setup = 0;
aql().setup |= 2 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
aql().workgroup_size_x = block_size_;
aql().workgroup_size_y = block_size_;
aql().grid_size_x = width_;
aql().grid_size_y = height_;
aql().group_segment_size = sizeof(uint) * block_size_ * block_size_;
// Debug
#ifdef DEBUG
std::cout << "workgroup size: " << block_size_ << ", " << block_size_
<< ", " << 1 << std::endl;
std::cout << "grid size: " << aql().grid_size_x << ", " <<
aql().grid_size_y << ", " << aql().grid_size_z << std::endl;
std::cout << "group segment size: " << aql().group_segment_size << std::endl;
#endif
}
void MatrixTranspose::Run(void) {
hsa_status_t err;
hsa_agent_t* gpu_dev = gpu_device1();
if (!rocrtst::CheckProfile(this)) {
return;
}
// Allocate kernel parameter
typedef struct args_t {
uint* offset_0;
uint* offset_1;
uint* offset_2;
uint* printf_buffer;
uint* vqueue_buffer;
uint* aqlwrap_pointer;
uint* in_buf;
uint* out_buf;
uint* local_buf;
uint iblock_size;
uint iwidth;
uint iheight;
} args;
args* kern_ptr = NULL;
err = hsa_amd_memory_pool_allocate(cpu_pool(), sizeof(args), 0,
(void**) &kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, gpu_dev, NULL, kern_ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kern_ptr->offset_0 = 0;
kern_ptr->offset_1 = 0;
kern_ptr->offset_2 = 0;
kern_ptr->printf_buffer = 0;
kern_ptr->vqueue_buffer = 0;
kern_ptr->aqlwrap_pointer = 0;
kern_ptr->in_buf = in_buffer_sys_;
kern_ptr->out_buf = out_buffer_sys_;
kern_ptr->local_buf = 0;
kern_ptr->iblock_size = block_size_;
kern_ptr->iwidth = width_;
kern_ptr->iheight = height_;
aql().kernarg_address = kern_ptr;
//Obtain the current queue write index.
uint64_t idx = hsa_queue_add_write_index_relaxed(main_queue(), 1);
((hsa_kernel_dispatch_packet_t*)(main_queue()->base_address))[idx] = aql();
rocrtst::PerfTimer p_timer;
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
((hsa_kernel_dispatch_packet_t*)(main_queue()->base_address))[idx].header |=
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
hsa_signal_store_release(main_queue()->doorbell_signal, idx);
//Wait on the dispatch signal until the kernel is finished.
hsa_signal_wait_scacquire(signal(), HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t) - 1, HSA_WAIT_STATE_ACTIVE);
p_timer.StopTimer(id);
hsa_amd_profiling_dispatch_time_t dispatch_time;
err = hsa_amd_profiling_get_dispatch_time(*gpu_dev, signal(), &dispatch_time);
uint64_t stamp = dispatch_time.end - dispatch_time.start;
uint64_t freq;
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freq);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::cout << "Kernel time is: " <<
(double) stamp / (double) freq * 1000.0 << std::endl;
hsa_signal_store_release(signal(), 1);
// Verify Results
VerifyResults (out_buffer_sys_);
// Abandon the first result which is warm up
time_mean_ = p_timer.ReadTimer(id); //rocrtst::CalcMean(timer);
}
void MatrixTranspose::DisplayResults(void) const {
if (!rocrtst::CheckProfile(this)) {
return;
}
std::cout << "============================================" << std::endl;
std::cout << "Matrix Transpose Mean Time: " << time_mean_ << std::endl;
return;
}
void MatrixTranspose::Close(void) {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void MatrixTranspose::InitializeData(void) {
// int openTest = 1;
block_size_ = 16; //blockSizes[openTest % NUM_BLOCK_SIZES];
width_ = 1920; //matrixDims[openTest / NUM_BLOCK_SIZES];
height_ = width_;
buf_size_ = width_ * height_ * sizeof(uint);
in_buffer_sys_ = (uint*) aligned_alloc(256, buf_size_);
SetData (in_buffer_sys_);
out_buffer_sys_ = (uint*) aligned_alloc(256, buf_size_);
FillData(out_buffer_sys_, 0xdeadbeef);
return;
}
void MatrixTranspose::SetData(uint* buffer) {
for (unsigned int i = 0; i < height_; i++) {
for (unsigned int j = 0; j < width_; j++) {
*(buffer + i * width_ + j) = i * width_ + j;
}
}
}
void MatrixTranspose::FillData(uint* buffer, unsigned int val) {
for (unsigned int i = 0; i < width_ * height_; i++) {
buffer[i] = val;
}
}
void MatrixTranspose::VerifyResults(uint* buffer) {
bool err = false;
for (unsigned int i = 0; (i < width_) && !err; i++) {
for (unsigned int j = 0; (j < height_) && !err; j++) {
ASSERT_EQ(*(buffer + i * height_ + j), j * width_ + i);
}
}
std::cout << "PASSED!" << std::endl;
}
@@ -0,0 +1,101 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef __ROCRTST_SRC_MATRIX_TRANSPOSE_H__
#define __ROCRTST_SRC_MATRIX_TRANSPOSE_H__
#include "perf_common/perf_base.h"
#include "common/base_rocr.h"
#include "hsa/hsa.h"
class MatrixTranspose: public rocrtst::BaseRocR, public PerfBase {
public:
//@Brief: Default Constructor
MatrixTranspose();
//@Brief: Destructor
~MatrixTranspose();
//@Brief: Override SetUp function
virtual void SetUp();
//@Brief: Run the measurement
virtual void Run();
//@Brief: Clean up and Close
virtual void Close();
//@Brief: Display results
virtual void DisplayResults() const;
private:
//@Brief: Set up data
virtual void SetData(uint* buffer);
//@Brief: Fill Data
virtual void FillData(uint* buffer, unsigned int val);
//@Brief: VerifyResults
virtual void VerifyResults(uint* buffer);
//@Brief: Initialize the object attribute
virtual void InitializeData();
uint* in_buffer_;
uint* out_buffer_;
uint* in_buffer_sys_;
uint* out_buffer_sys_;
unsigned int width_;
unsigned int height_;
unsigned int buf_size_;
unsigned int block_size_;
double time_mean_;
hsa_barrier_and_packet_t bpkt;
};
#endif //__ROCRTST_SRC_MATRIX_TRANSPOSE_H__
@@ -0,0 +1,198 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include "memory_allocation.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "hsa/hsa.h"
#include "gtest/gtest.h"
#include <algorithm>
MemoryAllocation::MemoryAllocation(uint32_t num_iters) :
BaseRocR(), allocation_time_ {0.0}, mem_pool_flag_(0) {
ptr = NULL;
}
MemoryAllocation::~MemoryAllocation() {
}
const char* MemoryAllocation::Str[16] = {"64K", "128K", "256K", "512K", "1M",
"2M", "4M", "8M", "16M", "32M",
"64M", "128M", "256M", "512M", "1G",
"2G"
};
const size_t MemoryAllocation::Size[16] = {64*1024, 128*1024,
256*1024,512*1024, 1024*1024,
2048*1024, 4096*1024, 8*1024*1024,
16*1024*1024, 32*1024*1024,
64*1024*1024, 128*1024*1024,
256 * 1024*1024, 512*1024*1024,
1024*1024*1024,
(size_t)2*1024*1024*1024
};
void MemoryAllocation::SetUp() {
hsa_status_t err;
if (HSA_STATUS_SUCCESS != rocrtst::InitAndSetupHSA(this)) {
return;
}
hsa_agent_t* cpu_dev = cpu_device();
err = hsa_amd_agent_iterate_memory_pools(*cpu_dev, rocrtst::FindGlobalPool,
&cpu_pool());
EXPECT_EQ(err, HSA_STATUS_INFO_BREAK);
if (err != HSA_STATUS_INFO_BREAK) {
std::cout << "Unable to find global pool. Test will not be run."
<< std::endl;
return;
}
//At this point, cpu_pool() should be in the global segment
err = hsa_amd_memory_pool_get_info(cpu_pool(),
(hsa_amd_memory_pool_info_t) HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
&mem_pool_flag_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}
void MemoryAllocation::Run() {
if (!rocrtst::CheckProfile(this)) {
return;
}
if (cpu_pool().handle == 0) {
return;
}
size_t iterations = RealIterationNum();
hsa_status_t err;
//Iterate over the different data size
for (int i = 0; i < 16; i++) {
std::vector<double> time;
for (uint32_t it = 0; it < iterations; it++) {
#if DEBUG
std::cout << "." << std::flush;
#endif
rocrtst::PerfTimer allocation_timer;
int index = allocation_timer.CreateTimer();
allocation_timer.StartTimer(index);
err = hsa_amd_memory_pool_allocate(cpu_pool(), Size[i], 0, &ptr);
allocation_timer.StopTimer(index);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
//Free the memory which was allocated
err = hsa_amd_memory_pool_free(ptr);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
ptr = NULL;
// PUsh the results back to vector time
time.push_back(allocation_timer.ReadTimer(index));
}
#if DEBUG
std::cout << std::endl;
#endif
//Get mean copy time and store to the array
allocation_time_[i] = GetMeanTime(time);
}
}
size_t MemoryAllocation::RealIterationNum() {
return num_iteration() * 1.2 + 1;
}
double MemoryAllocation::GetMeanTime(std::vector<double>& vec) {
std::sort(vec.begin(), vec.end());
vec.erase(vec.begin());
vec.erase(vec.begin(), vec.begin() + num_iteration() * 0.1);
vec.erase(vec.begin() + num_iteration(), vec.end());
double mean = 0.0;
int num = vec.size();
for (int it = 0; it < num; it++) {
mean += vec[it];
}
mean /= num;
return mean;
}
void MemoryAllocation::DisplayResults() const {
if (!rocrtst::CheckProfile(this)) {
return;
}
fprintf(stdout, "==============================================\n");
fprintf(stdout, " Data Size Allocation_time BandWidth(GB/s)\n");
for (int i = 0; i < 16; i++) {
fprintf(stdout, " %9s %15.6f %15.6f\n", Str[i], allocation_time_[i],
2 * Size[i] / allocation_time_[i] / 1024 / 1024 / 1024);
}
fprintf(stdout, "==============================================\n");
return;
}
void MemoryAllocation::Close() {
hsa_status_t err;
err = rocrtst::CommonCleanUp(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}

Some files were not shown because too many files have changed in this diff Show More