Different Atomic operation tests on GPU and system memory

Change-Id: I04154b588086d49142a64c8fe4826d041ded2991


[ROCm/ROCR-Runtime commit: 044fb8dc27]
Этот коммит содержится в:
rohit pathania
2018-05-18 16:24:32 +05:30
родитель d77f587d15
Коммит a9638cd006
5 изменённых файлов: 817 добавлений и 0 удалений
+499
Просмотреть файл
@@ -0,0 +1,499 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include "suites/functional/memory_atomics.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const uint32_t kNumBufferElements = 256;
static const int kValue = 5;
// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup << 16), __ATOMIC_RELEASE);
}
MemoryAtomic::MemoryAtomic(AtomicTest testtype) :
TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
testtype_ = testtype;
std::string name;
std::string desc;
name = "RocR Memory Atomic Test";
desc = "";
if (testtype_ == ADD) {
name += " For ADD";
desc += " This test will do Add kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == SUB) {
name += " For Sub";
desc += " This test will do Sub kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == AND) {
name += " For And";
desc += " This test will do AND kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == OR) {
name += " For Or";
desc += " This test will do OR kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == XOR) {
name += " For Xor";
desc += " This test will do XOR kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == MIN) {
name += " For Minimum";
desc += " This test will do Minimum kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == MAX) {
name += " For Maximum";
desc += " This test will do Maximum kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == XCHG) {
name += " For Exchange";
desc += " This test will do Xchg kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == INC) {
name += " For Increment";
desc += " This test will do Increment kernel atomic"
" operation on GPU and system memory.";
} else if (testtype_ == DEC) {
name += " For Decremnet";
desc += " This test will do decrement kernel atomic"
" operation on GPU and system memory.";
}
set_title(name);
set_description(desc);
memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
}
MemoryAtomic::~MemoryAtomic(void) {
}
// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAtomic::SetUp(void) {
hsa_status_t err;
TestBase::SetUp();
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
return;
}
void MemoryAtomic::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
}
void MemoryAtomic::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void MemoryAtomic::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void MemoryAtomic::Close() {
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
void MemoryAtomic::WriteAQLPktToQueue(hsa_queue_t* q) {
void* queue_base = q->base_address;
const uint32_t queue_mask = q->size - 1;
uint64_t index = hsa_queue_add_write_index_relaxed(q, 1);
reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
queue_base)[index & queue_mask] = aql();
}
typedef struct __attribute__ ((aligned(16))) args_t {
int *a;
int *b;
int *c;
int d;
int n;
} args;
static const char kSubTestSeparator[] = " **************************";
static const int kMemoryAllocSize = 10;
void MemoryAtomic::MemoryAtomicTest(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// Get Global Memory Pool on the gpuAgent to allocate gpu buffers
hsa_amd_memory_pool_t gpu_pool;
err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
rocrtst::GetGlobalMemoryPool,
&gpu_pool);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
hsa_amd_memory_pool_access_t access;
hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
&access);
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
// hsa objects
hsa_queue_t *queue = NULL; // command queue
// get queue size
uint32_t queue_size = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// create queue
err = hsa_queue_create(gpuAgent,
queue_size, HSA_QUEUE_TYPE_MULTI,
NULL, NULL, 0, 0, &queue);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Find a memory pool that supports kernel arguments.
hsa_amd_memory_pool_t kernarg_pool;
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
rocrtst::GetKernArgMemoryPool,
&kernarg_pool);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allocate the host side buffers
// (refSysdata,oldValues,oldrefdata,kernArg) on system memory
// this is ref sys data on which atomics operation need to done
int *refSysdata = NULL;
// This is oldrefdata which will be required to compare the returned old values after atomics operation
int *oldrefdata = NULL;
// This is returned old values
int *oldValues = NULL;
// This is expected data set
int *expecteddata = NULL;
// Get System Memory Pool on the cpuAgent to allocate host side buffers
hsa_amd_memory_pool_t global_pool;
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
rocrtst::GetGlobalMemoryPool,
&global_pool);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(global_pool,
kMemoryAllocSize, 0,
reinterpret_cast<void **>(&oldValues));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(global_pool,
kMemoryAllocSize, 0,
reinterpret_cast<void **>(&refSysdata));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(global_pool,
kMemoryAllocSize, 0,
reinterpret_cast<void **>(&oldrefdata));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_allocate(global_pool,
kMemoryAllocSize, 0,
reinterpret_cast<void **>(&expecteddata));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allocate the kernel argument buffer from the kernarg_pool.
args *kernArguments = NULL;
err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
reinterpret_cast<void **>(&kernArguments));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
memset(oldValues, 0, kMemoryAllocSize * sizeof(int));
memset(expecteddata, 0, kMemoryAllocSize * sizeof(int));
// for the dGPU, we have coarse grained local memory,
// so allocate memory for it on the GPU's GLOBAL segment .
// Get local memory of GPU to allocate device side buffers on which atomics operation need to done
int *gpuRefData = NULL;
err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0,
reinterpret_cast<void **>(&gpuRefData));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Allow cpuAgent access to all allocated GPU memory.
err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuRefData);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
memset(gpuRefData, 0, kMemoryAllocSize * sizeof(int));
// initialize the host buffers & gpuRefData buffer
for (int i = 0; i < kMemoryAllocSize; ++i) {
unsigned int seed = time(NULL);
refSysdata[i] = 6 + rand_r(&seed) % 1;
gpuRefData[i] = 6 + rand_r(&seed) % 1;
oldrefdata[i] = refSysdata[i];
}
// Allow gpuAgent access to all allocated system memory.
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldValues);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, refSysdata);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldrefdata);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArguments);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
kernArguments->a = refSysdata;
kernArguments->b = gpuRefData;
kernArguments->c = oldValues;
if (testtype_ != INC && testtype_ != DEC) {
kernArguments->d = kValue;
}
// Create the executable, get symbol by name and load the code object
set_kernel_file_name("atomicOperations_kernels.hsaco");
if (testtype_ == ADD) {
set_kernel_name("test_atomic_add");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] + kValue;
}
} else if (testtype_ == SUB) {
set_kernel_name("test_atomic_sub");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] - kValue;
}
} else if (testtype_ == AND) {
set_kernel_name("test_atomic_and");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] & kValue;
}
} else if (testtype_ == OR) {
set_kernel_name("test_atomic_or");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] | kValue;
}
} else if (testtype_ == XOR) {
set_kernel_name("test_atomic_xor");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] ^ kValue;
}
} else if (testtype_ == MIN) {
set_kernel_name("test_atomic_min");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = std::min(oldrefdata[i], kValue);
}
} else if (testtype_ == MAX) {
set_kernel_name("test_atomic_max");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = std::max(oldrefdata[i], kValue);
}
} else if (testtype_ == INC) {
set_kernel_name("test_atomic_inc");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] + 4;
}
} else if (testtype_ == DEC) {
set_kernel_name("test_atomic_dec");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = oldrefdata[i] - 4;
}
} else if (testtype_ == XCHG) {
set_kernel_name("test_atomic_xchg");
// set the expected data result set from kernel
for (int i = 0; i < kMemoryAllocSize; ++i) {
expecteddata[i] = kValue;
}
} else {
if (verbosity() > 0) {
std::cout<< "No test specified" <<std::endl;
}
}
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
aql().workgroup_size_x = 256;
aql().workgroup_size_y = 1;
aql().workgroup_size_z = 1;
aql().grid_size_x = kMemoryAllocSize;
aql().kernarg_address = kernArguments;
aql().kernel_object = kernel_object();
const uint32_t queue_mask = queue->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue);
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
void* q_base = queue->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, index);
// wait for the signal and reset it for future use
while (hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
(uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }
hsa_signal_store_relaxed(aql().completion_signal, 1);
// compare results with expected results
for (int i = 0; i < kMemoryAllocSize; ++i) {
ASSERT_EQ(refSysdata[i], expecteddata[i]);
ASSERT_EQ(gpuRefData[i], expecteddata[i]);
ASSERT_EQ(oldValues[i], oldrefdata[i]);
}
if (refSysdata) { hsa_memory_free(refSysdata); }
if (oldrefdata) { hsa_memory_free(oldrefdata); }
if (oldValues) {hsa_memory_free(oldValues); }
if (gpuRefData) {hsa_memory_free(gpuRefData); }
if (kernArguments) { hsa_memory_free(kernArguments); }
if (queue) { hsa_queue_destroy(queue); }
} else {
if (verbosity() > 0) {
std::cout<< "Test not applicable as system is not large bar."
"Skipping."<< std::endl;
std::cout << kSubTestSeparator << std::endl;
}
return;
}
}
void MemoryAtomic::MemoryAtomicTest(void) {
hsa_status_t err;
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
MemoryAtomicTest(cpus[0], gpus[i]);
}
}
+100
Просмотреть файл
@@ -0,0 +1,100 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
// Atomic Test type
enum AtomicTest {
ADD, // For add atomic operation
SUB, // For sub atomic operation
AND, // For and atomic operation
OR, // For or atomic operation
XOR, // For xor atomic operation
INC, // For inc atomic operation
DEC, // For dec atomic operation
MAX, // For max atomic operation
MIN, // For min atomic operation
XCHG, // For xchg atomic operation
NO_TEST};
class MemoryAtomic : public TestBase {
public:
explicit MemoryAtomic(AtomicTest testtype);
// @Brief: Destructor for test case of MemoryTest
virtual ~MemoryAtomic();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
void MemoryAtomicTest(void);
private:
void MemoryAtomicTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void WriteAQLPktToQueue(hsa_queue_t* q);
AtomicTest testtype_;
};
#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_
+7
Просмотреть файл
@@ -316,6 +316,7 @@ set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl")
build_sample_for_devices("gpuReadWrite")
# Vector Add Debug Trap
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_debug_trap_kernel.cl")
@@ -326,6 +327,12 @@ set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_memory_fault_kernel.cl")
build_sample_for_devices("vector_add_memory_fault")
# atomic_add_kernels
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/atomicOperations_kernels.cl")
build_sample_for_devices("atomicOperations")
# Build rules
add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources}
${common_srcs} ${testCommonSources})
+140
Просмотреть файл
@@ -0,0 +1,140 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
__kernel void test_atomic_add(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_add(&sysMemory[tid], value);
atomic_add(&gpuMemory[tid], value);
}
__kernel void test_atomic_sub(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_sub(&sysMemory[tid], value);
atomic_sub(&gpuMemory[tid], value);
}
__kernel void test_atomic_and(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_and(&sysMemory[tid], value);
atomic_and(&gpuMemory[tid], value);
}
__kernel void test_atomic_or(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_or(&sysMemory[tid], value);
atomic_or(&gpuMemory[tid], value);
}
__kernel void test_atomic_xor(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_xor(&sysMemory[tid], value);
atomic_xor(&gpuMemory[tid], value);
}
__kernel void test_atomic_xchg(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_xchg(&sysMemory[tid], value);
atomic_xchg(&gpuMemory[tid], value);
}
__kernel void test_atomic_inc(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues) {
int tid = get_global_id(0);
oldValues[tid] = atomic_inc(&sysMemory[tid]);
atomic_inc(&sysMemory[tid]);
atomic_inc(&sysMemory[tid]);
atomic_inc(&sysMemory[tid]);
atomic_inc(&gpuMemory[tid]);
atomic_inc(&gpuMemory[tid]);
atomic_inc(&gpuMemory[tid]);
atomic_inc(&gpuMemory[tid]);
}
__kernel void test_atomic_dec(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues) {
int tid = get_global_id(0);
oldValues[tid] = atomic_dec(&sysMemory[tid]);
atomic_dec(&sysMemory[tid]);
atomic_dec(&sysMemory[tid]);
atomic_dec(&sysMemory[tid]);
atomic_dec(&gpuMemory[tid]);
atomic_dec(&gpuMemory[tid]);
atomic_dec(&gpuMemory[tid]);
atomic_dec(&gpuMemory[tid]);
}
__kernel void test_atomic_max(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_max(&sysMemory[tid], value);
atomic_max(&gpuMemory[tid], value);
}
__kernel void test_atomic_min(volatile __global int *sysMemory,
volatile __global int *gpuMemory,
__global int *oldValues, int value) {
int tid = get_global_id(0);
oldValues[tid] = atomic_min(&sysMemory[tid], value);
atomic_min(&gpuMemory[tid], value);
}
+71
Просмотреть файл
@@ -53,6 +53,7 @@
#include "suites/functional/memory_access.h"
#include "suites/functional/ipc.h"
#include "suites/functional/memory_alignment.h"
#include "suites/functional/memory_atomics.h"
#include "suites/performance/dispatch_time.h"
#include "suites/performance/memory_async_copy.h"
#include "suites/performance/memory_async_copy_numa.h"
@@ -176,6 +177,76 @@ TEST(rocrtstFunc, Memory_Max_Mem) {
RunCustomTestEpilog(&mt);
}
TEST(rocrtstFunc, Memory_Atomic_Add_Test) {
MemoryAtomic ma(ADD);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Sub_Test) {
MemoryAtomic ma(SUB);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_And_Test) {
MemoryAtomic ma(AND);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Or_Test) {
MemoryAtomic ma(OR);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Xor_Test) {
MemoryAtomic ma(XOR);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Min_Test) {
MemoryAtomic ma(MIN);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Max_Test) {
MemoryAtomic ma(MAX);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Inc_Test) {
MemoryAtomic ma(INC);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Dec_Test) {
MemoryAtomic ma(DEC);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, Memory_Atomic_Xchg_Test) {
MemoryAtomic ma(XCHG);
RunCustomTestProlog(&ma);
ma.MemoryAtomicTest();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstFunc, DebugBasicTests) {
DebugBasicTest mt;
RunCustomTestProlog(&mt);