From a9638cd006784e1afc0f89090c6ef9dd20612ee9 Mon Sep 17 00:00:00 2001 From: rohit pathania Date: Fri, 18 May 2018 16:24:32 +0530 Subject: [PATCH] Different Atomic operation tests on GPU and system memory Change-Id: I04154b588086d49142a64c8fe4826d041ded2991 [ROCm/ROCR-Runtime commit: 044fb8dc27c36b526f2e2b603b469d9349178cf3] --- .../suites/functional/memory_atomics.cc | 499 ++++++++++++++++++ .../suites/functional/memory_atomics.h | 100 ++++ .../rocrtst/suites/test_common/CMakeLists.txt | 7 + .../kernels/atomicOperations_kernels.cl | 140 +++++ .../rocrtst/suites/test_common/main.cc | 71 +++ 5 files changed, 817 insertions(+) create mode 100755 projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.cc create mode 100755 projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.h create mode 100755 projects/rocr-runtime/rocrtst/suites/test_common/kernels/atomicOperations_kernels.cl diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.cc new file mode 100755 index 0000000000..943766bd74 --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.cc @@ -0,0 +1,499 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include + +#include "suites/functional/memory_atomics.h" +#include "common/base_rocr_utils.h" +#include "common/common.h" +#include "common/helper_funcs.h" +#include "common/hsatimer.h" +#include "gtest/gtest.h" +#include "hsa/hsa.h" +#include "hsa/hsa_ext_finalize.h" + +static const uint32_t kNumBufferElements = 256; +static const int kValue = 5; + + +// This wrapper atomically writes the provided header and setup to the +// provided AQL packet. The provided AQL packet address should be in the +// queue memory space. +static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup, + hsa_kernel_dispatch_packet_t* queue_packet) { + __atomic_store_n(reinterpret_cast(queue_packet), + header | (setup << 16), __ATOMIC_RELEASE); +} + + + +MemoryAtomic::MemoryAtomic(AtomicTest testtype) : + TestBase() { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + testtype_ = testtype; + std::string name; + std::string desc; + + name = "RocR Memory Atomic Test"; + desc = ""; + + if (testtype_ == ADD) { + name += " For ADD"; + desc += " This test will do Add kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == SUB) { + name += " For Sub"; + desc += " This test will do Sub kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == AND) { + name += " For And"; + desc += " This test will do AND kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == OR) { + name += " For Or"; + desc += " This test will do OR kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == XOR) { + name += " For Xor"; + desc += " This test will do XOR kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == MIN) { + name += " For Minimum"; + desc += " This test will do Minimum kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == MAX) { + name += " For Maximum"; + desc += " This test will do Maximum kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == XCHG) { + name += " For Exchange"; + desc += " This test will do Xchg kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == INC) { + name += " For Increment"; + desc += " This test will do Increment kernel atomic" + " operation on GPU and system memory."; + } else if (testtype_ == DEC) { + name += " For Decremnet"; + desc += " This test will do decrement kernel atomic" + " operation on GPU and system memory."; + } + + set_title(name); + set_description(desc); + memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t)); +} + +MemoryAtomic::~MemoryAtomic(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +void MemoryAtomic::SetUp(void) { + hsa_status_t err; + + TestBase::SetUp(); + + err = rocrtst::SetDefaultAgents(this); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + err = rocrtst::SetPoolsTypical(this); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // Fill up the kernel packet except header + err = rocrtst::InitializeAQLPacket(this, &aql()); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + return; +} + +void MemoryAtomic::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void MemoryAtomic::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void MemoryAtomic::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + return; +} + +void MemoryAtomic::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +void MemoryAtomic::WriteAQLPktToQueue(hsa_queue_t* q) { + void* queue_base = q->base_address; + const uint32_t queue_mask = q->size - 1; + uint64_t index = hsa_queue_add_write_index_relaxed(q, 1); + + reinterpret_cast( + queue_base)[index & queue_mask] = aql(); +} + + + + + +typedef struct __attribute__ ((aligned(16))) args_t { + int *a; + int *b; + int *c; + int d; + int n; + } args; + + + +static const char kSubTestSeparator[] = " **************************"; + + +static const int kMemoryAllocSize = 10; + +void MemoryAtomic::MemoryAtomicTest(hsa_agent_t cpuAgent, + hsa_agent_t gpuAgent) { + hsa_status_t err; + + // Get Global Memory Pool on the gpuAgent to allocate gpu buffers + hsa_amd_memory_pool_t gpu_pool; + err = hsa_amd_agent_iterate_memory_pools(gpuAgent, + rocrtst::GetGlobalMemoryPool, + &gpu_pool); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + hsa_amd_memory_pool_access_t access; + hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, + &access); + if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) { + // hsa objects + hsa_queue_t *queue = NULL; // command queue + // get queue size + uint32_t queue_size = 0; + err = hsa_agent_get_info(gpuAgent, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // create queue + err = hsa_queue_create(gpuAgent, + queue_size, HSA_QUEUE_TYPE_MULTI, + NULL, NULL, 0, 0, &queue); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // Find a memory pool that supports kernel arguments. + hsa_amd_memory_pool_t kernarg_pool; + err = hsa_amd_agent_iterate_memory_pools(cpuAgent, + rocrtst::GetKernArgMemoryPool, + &kernarg_pool); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // Allocate the host side buffers + // (refSysdata,oldValues,oldrefdata,kernArg) on system memory + + // this is ref sys data on which atomics operation need to done + int *refSysdata = NULL; + // This is oldrefdata which will be required to compare the returned old values after atomics operation + int *oldrefdata = NULL; + // This is returned old values + int *oldValues = NULL; + // This is expected data set + int *expecteddata = NULL; + + // Get System Memory Pool on the cpuAgent to allocate host side buffers + hsa_amd_memory_pool_t global_pool; + err = hsa_amd_agent_iterate_memory_pools(cpuAgent, + rocrtst::GetGlobalMemoryPool, + &global_pool); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&oldValues)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&refSysdata)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&oldrefdata)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&expecteddata)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + + // Allocate the kernel argument buffer from the kernarg_pool. + args *kernArguments = NULL; + err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0, + reinterpret_cast(&kernArguments)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + + memset(oldValues, 0, kMemoryAllocSize * sizeof(int)); + memset(expecteddata, 0, kMemoryAllocSize * sizeof(int)); + + // for the dGPU, we have coarse grained local memory, + // so allocate memory for it on the GPU's GLOBAL segment . + + // Get local memory of GPU to allocate device side buffers on which atomics operation need to done + int *gpuRefData = NULL; + err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0, + reinterpret_cast(&gpuRefData)); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + + // Allow cpuAgent access to all allocated GPU memory. + err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuRefData); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + memset(gpuRefData, 0, kMemoryAllocSize * sizeof(int)); + + // initialize the host buffers & gpuRefData buffer + for (int i = 0; i < kMemoryAllocSize; ++i) { + unsigned int seed = time(NULL); + refSysdata[i] = 6 + rand_r(&seed) % 1; + gpuRefData[i] = 6 + rand_r(&seed) % 1; + oldrefdata[i] = refSysdata[i]; + } + + + // Allow gpuAgent access to all allocated system memory. + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldValues); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, refSysdata); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldrefdata); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArguments); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + kernArguments->a = refSysdata; + kernArguments->b = gpuRefData; + kernArguments->c = oldValues; + if (testtype_ != INC && testtype_ != DEC) { + kernArguments->d = kValue; + } + + // Create the executable, get symbol by name and load the code object + set_kernel_file_name("atomicOperations_kernels.hsaco"); + + if (testtype_ == ADD) { + set_kernel_name("test_atomic_add"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] + kValue; + } + } else if (testtype_ == SUB) { + set_kernel_name("test_atomic_sub"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] - kValue; + } + } else if (testtype_ == AND) { + set_kernel_name("test_atomic_and"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] & kValue; + } + } else if (testtype_ == OR) { + set_kernel_name("test_atomic_or"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] | kValue; + } + } else if (testtype_ == XOR) { + set_kernel_name("test_atomic_xor"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] ^ kValue; + } + } else if (testtype_ == MIN) { + set_kernel_name("test_atomic_min"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = std::min(oldrefdata[i], kValue); + } + } else if (testtype_ == MAX) { + set_kernel_name("test_atomic_max"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = std::max(oldrefdata[i], kValue); + } + } else if (testtype_ == INC) { + set_kernel_name("test_atomic_inc"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] + 4; + } + } else if (testtype_ == DEC) { + set_kernel_name("test_atomic_dec"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = oldrefdata[i] - 4; + } + } else if (testtype_ == XCHG) { + set_kernel_name("test_atomic_xchg"); + // set the expected data result set from kernel + for (int i = 0; i < kMemoryAllocSize; ++i) { + expecteddata[i] = kValue; + } + } else { + if (verbosity() > 0) { + std::cout<< "No test specified" <size - 1; + + // Load index for writing header later to command queue at same index + uint64_t index = hsa_queue_load_write_index_relaxed(queue); + + // This function simply copies the data we've collected so far into our + // local AQL packet, except the the setup and header fields. + WriteAQLPktToQueue(queue); + + + aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH; + aql().header |= HSA_FENCE_SCOPE_SYSTEM << + HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + aql().header |= HSA_FENCE_SCOPE_SYSTEM << + HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + void* q_base = queue->base_address; + // Set the Aql packet header + AtomicSetPacketHeader(aql().header, aql().setup, + &(reinterpret_cast + (q_base))[index & queue_mask]); + + + // ringdoor bell + hsa_signal_store_relaxed(queue->doorbell_signal, index); + + // wait for the signal and reset it for future use + while (hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1, + (uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { } + + hsa_signal_store_relaxed(aql().completion_signal, 1); + + // compare results with expected results + for (int i = 0; i < kMemoryAllocSize; ++i) { + ASSERT_EQ(refSysdata[i], expecteddata[i]); + ASSERT_EQ(gpuRefData[i], expecteddata[i]); + ASSERT_EQ(oldValues[i], oldrefdata[i]); + } + + if (refSysdata) { hsa_memory_free(refSysdata); } + if (oldrefdata) { hsa_memory_free(oldrefdata); } + if (oldValues) {hsa_memory_free(oldValues); } + if (gpuRefData) {hsa_memory_free(gpuRefData); } + if (kernArguments) { hsa_memory_free(kernArguments); } + if (queue) { hsa_queue_destroy(queue); } + } else { + if (verbosity() > 0) { + std::cout<< "Test not applicable as system is not large bar." + "Skipping."<< std::endl; + std::cout << kSubTestSeparator << std::endl; + } + return; + } +} + +void MemoryAtomic::MemoryAtomicTest(void) { + hsa_status_t err; + // find all cpu agents + std::vector cpus; + err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + // find all gpu agents + std::vector gpus; + err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + for (unsigned int i = 0 ; i< gpus.size(); ++i) { + MemoryAtomicTest(cpus[0], gpus[i]); + } +} + diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.h b/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.h new file mode 100755 index 0000000000..5121b6b463 --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_atomics.h @@ -0,0 +1,100 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_ +#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_ + + +#include "common/base_rocr.h" +#include "hsa/hsa.h" +#include "suites/test_common/test_base.h" + +// Atomic Test type +enum AtomicTest { + ADD, // For add atomic operation + SUB, // For sub atomic operation + AND, // For and atomic operation + OR, // For or atomic operation + XOR, // For xor atomic operation + INC, // For inc atomic operation + DEC, // For dec atomic operation + MAX, // For max atomic operation + MIN, // For min atomic operation + XCHG, // For xchg atomic operation + NO_TEST}; + +class MemoryAtomic : public TestBase { + public: + explicit MemoryAtomic(AtomicTest testtype); + + // @Brief: Destructor for test case of MemoryTest + virtual ~MemoryAtomic(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + void MemoryAtomicTest(void); + + + private: + void MemoryAtomicTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent); + + void WriteAQLPktToQueue(hsa_queue_t* q); + + AtomicTest testtype_; +}; + +#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_ diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt b/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt index b8480358f8..6ce934fa9b 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt +++ b/projects/rocr-runtime/rocrtst/suites/test_common/CMakeLists.txt @@ -316,6 +316,7 @@ set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl") build_sample_for_devices("gpuReadWrite") + # Vector Add Debug Trap set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_debug_trap_kernel.cl") @@ -326,6 +327,12 @@ set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_memory_fault_kernel.cl") build_sample_for_devices("vector_add_memory_fault") +# atomic_add_kernels +set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") +set(CL_FILE_LIST "${KERNELS_DIR}/atomicOperations_kernels.cl") +build_sample_for_devices("atomicOperations") + + # Build rules add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources} ${common_srcs} ${testCommonSources}) diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/kernels/atomicOperations_kernels.cl b/projects/rocr-runtime/rocrtst/suites/test_common/kernels/atomicOperations_kernels.cl new file mode 100755 index 0000000000..1a3906c238 --- /dev/null +++ b/projects/rocr-runtime/rocrtst/suites/test_common/kernels/atomicOperations_kernels.cl @@ -0,0 +1,140 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +__kernel void test_atomic_add(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_add(&sysMemory[tid], value); + atomic_add(&gpuMemory[tid], value); +} + +__kernel void test_atomic_sub(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_sub(&sysMemory[tid], value); + atomic_sub(&gpuMemory[tid], value); +} + +__kernel void test_atomic_and(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_and(&sysMemory[tid], value); + atomic_and(&gpuMemory[tid], value); +} + +__kernel void test_atomic_or(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_or(&sysMemory[tid], value); + atomic_or(&gpuMemory[tid], value); +} + +__kernel void test_atomic_xor(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_xor(&sysMemory[tid], value); + atomic_xor(&gpuMemory[tid], value); +} + +__kernel void test_atomic_xchg(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_xchg(&sysMemory[tid], value); + atomic_xchg(&gpuMemory[tid], value); +} + +__kernel void test_atomic_inc(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues) { + int tid = get_global_id(0); + oldValues[tid] = atomic_inc(&sysMemory[tid]); + atomic_inc(&sysMemory[tid]); + atomic_inc(&sysMemory[tid]); + atomic_inc(&sysMemory[tid]); + + atomic_inc(&gpuMemory[tid]); + atomic_inc(&gpuMemory[tid]); + atomic_inc(&gpuMemory[tid]); + atomic_inc(&gpuMemory[tid]); +} + +__kernel void test_atomic_dec(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues) { + int tid = get_global_id(0); + oldValues[tid] = atomic_dec(&sysMemory[tid]); + atomic_dec(&sysMemory[tid]); + atomic_dec(&sysMemory[tid]); + atomic_dec(&sysMemory[tid]); + + atomic_dec(&gpuMemory[tid]); + atomic_dec(&gpuMemory[tid]); + atomic_dec(&gpuMemory[tid]); + atomic_dec(&gpuMemory[tid]); +} + +__kernel void test_atomic_max(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_max(&sysMemory[tid], value); + atomic_max(&gpuMemory[tid], value); +} + +__kernel void test_atomic_min(volatile __global int *sysMemory, + volatile __global int *gpuMemory, + __global int *oldValues, int value) { + int tid = get_global_id(0); + oldValues[tid] = atomic_min(&sysMemory[tid], value); + atomic_min(&gpuMemory[tid], value); +} + + diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc index 3947457dd1..39f5971b63 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc +++ b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc @@ -53,6 +53,7 @@ #include "suites/functional/memory_access.h" #include "suites/functional/ipc.h" #include "suites/functional/memory_alignment.h" +#include "suites/functional/memory_atomics.h" #include "suites/performance/dispatch_time.h" #include "suites/performance/memory_async_copy.h" #include "suites/performance/memory_async_copy_numa.h" @@ -176,6 +177,76 @@ TEST(rocrtstFunc, Memory_Max_Mem) { RunCustomTestEpilog(&mt); } +TEST(rocrtstFunc, Memory_Atomic_Add_Test) { + MemoryAtomic ma(ADD); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Sub_Test) { + MemoryAtomic ma(SUB); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_And_Test) { + MemoryAtomic ma(AND); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Or_Test) { + MemoryAtomic ma(OR); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Xor_Test) { + MemoryAtomic ma(XOR); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Min_Test) { + MemoryAtomic ma(MIN); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Max_Test) { + MemoryAtomic ma(MAX); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Inc_Test) { + MemoryAtomic ma(INC); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Dec_Test) { + MemoryAtomic ma(DEC); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + +TEST(rocrtstFunc, Memory_Atomic_Xchg_Test) { + MemoryAtomic ma(XCHG); + RunCustomTestProlog(&ma); + ma.MemoryAtomicTest(); + RunCustomTestEpilog(&ma); +} + TEST(rocrtstFunc, DebugBasicTests) { DebugBasicTest mt; RunCustomTestProlog(&mt);