2016-03-08 12:57:22 -06:00
|
|
|
/*
|
|
|
|
|
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2016-09-27 17:24:33 +05:30
|
|
|
/* HIT_START
|
2018-09-17 15:26:45 +05:30
|
|
|
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 --gpu-architecture=sm_60
|
2016-09-27 17:24:33 +05:30
|
|
|
* RUN: %t
|
|
|
|
|
* HIT_END
|
|
|
|
|
*/
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
// Includes HIP Runtime
|
|
|
|
|
#include "hip/hip_runtime.h"
|
|
|
|
|
#include <test_common.h>
|
|
|
|
|
|
2016-01-26 20:14:33 -06:00
|
|
|
// includes, system
|
2018-06-02 12:27:17 +01:00
|
|
|
#include <algorithm>
|
2016-01-26 20:14:33 -06:00
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <math.h>
|
2018-06-02 12:27:17 +01:00
|
|
|
#include <type_traits>
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
#define EXIT_WAIVED 2
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
const char* sampleName = "hipSimpleAtomicsTest";
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
using namespace std;
|
|
|
|
|
|
2016-01-26 20:14:33 -06:00
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Auto-Verification Code
|
|
|
|
|
bool testResult = true;
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
bool computeGoldBitwise(...) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
|
|
|
|
bool computeGoldBitwise(T* gpuData, int len) {
|
|
|
|
|
T val = 0xff;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
|
// 9th element should be 1
|
|
|
|
|
val &= (2 * i + 7);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (val != gpuData[8]) {
|
|
|
|
|
printf("atomicAnd failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
|
// 10th element should be 0xff
|
|
|
|
|
val |= (1 << i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (val != gpuData[9]) {
|
|
|
|
|
printf("atomicOr failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = 0xff;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
|
// 11th element should be 0xff
|
|
|
|
|
val ^= i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (val != gpuData[10]) {
|
|
|
|
|
printf("atomicXor failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
|
bool computeGold(T* gpuData, int len) {
|
|
|
|
|
T val = 0;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
for (int i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
val += 10;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[0]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicAdd failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = 0;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
for (int i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
val -= 10;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[1]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicSub failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool found = false;
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
for (T i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
// third element should be a member of [0, len)
|
2018-03-12 11:29:03 +05:30
|
|
|
if (i == gpuData[2]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
found = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!found) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicExch failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = -(1 << 8);
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
for (T i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
// fourth element should be len-1
|
|
|
|
|
val = max(val, i);
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[3]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicMax failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = 1 << 8;
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
for (T i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
val = min(val, i);
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[4]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicMin failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int limit = 17;
|
|
|
|
|
val = 0;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
|
val = (val >= limit) ? 0 : val + 1;
|
2016-01-26 20:14:33 -06:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[5]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicInc failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
limit = 137;
|
|
|
|
|
val = 0;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
|
val = ((val == 0) || (val > limit)) ? limit : val - 1;
|
2016-01-26 20:14:33 -06:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (val != gpuData[6]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicDec failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
found = false;
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
for (T i = 0; i < len; ++i) {
|
2016-01-26 20:14:33 -06:00
|
|
|
// eighth element should be a member of [0, len)
|
2018-03-12 11:29:03 +05:30
|
|
|
if (i == gpuData[7]) {
|
2016-01-26 20:14:33 -06:00
|
|
|
found = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!found) {
|
2016-01-26 20:14:33 -06:00
|
|
|
printf("atomicCAS failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
return computeGoldBitwise(gpuData, len);
|
2016-01-26 20:14:33 -06:00
|
|
|
}
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
__device__
|
|
|
|
|
void testKernelExch(...) {}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
|
|
|
|
|
__device__
|
|
|
|
|
void testKernelExch(T* g_odata) {
|
|
|
|
|
// access thread id
|
|
|
|
|
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
// Atomic exchange
|
|
|
|
|
atomicExch(&g_odata[2], tid);
|
|
|
|
|
}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
__device__
|
|
|
|
|
void testKernelSub(...) {}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
template<
|
|
|
|
|
typename T,
|
|
|
|
|
typename enable_if<
|
|
|
|
|
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
2018-09-17 15:26:45 +05:30
|
|
|
__device__
|
2018-06-02 12:27:17 +01:00
|
|
|
void testKernelSub(T* g_odata) {
|
2016-01-26 20:14:33 -06:00
|
|
|
// Atomic subtraction (final should be 0)
|
|
|
|
|
atomicSub(&g_odata[1], 10);
|
2018-06-02 12:27:17 +01:00
|
|
|
}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
__device__
|
|
|
|
|
void testKernelIntegral(...) {}
|
|
|
|
|
|
|
|
|
|
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
|
|
|
|
__device__
|
|
|
|
|
void testKernelIntegral(T* g_odata) {
|
|
|
|
|
// access thread id
|
|
|
|
|
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Atomic maximum
|
|
|
|
|
atomicMax(&g_odata[3], tid);
|
|
|
|
|
|
|
|
|
|
// Atomic minimum
|
|
|
|
|
atomicMin(&g_odata[4], tid);
|
|
|
|
|
|
|
|
|
|
// Atomic increment (modulo 17+1)
|
2018-03-12 11:29:03 +05:30
|
|
|
atomicInc((unsigned int*)&g_odata[5], 17);
|
2017-11-19 01:54:12 +00:00
|
|
|
|
2016-01-26 20:14:33 -06:00
|
|
|
// Atomic decrement
|
2018-03-12 11:29:03 +05:30
|
|
|
atomicDec((unsigned int*)&g_odata[6], 137);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Atomic compare-and-swap
|
2018-03-12 11:29:03 +05:30
|
|
|
atomicCAS(&g_odata[7], tid - 1, tid);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Bitwise atomic instructions
|
|
|
|
|
|
|
|
|
|
// Atomic AND
|
2018-03-12 11:29:03 +05:30
|
|
|
atomicAnd(&g_odata[8], 2 * tid + 7);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Atomic OR
|
|
|
|
|
atomicOr(&g_odata[9], 1 << tid);
|
|
|
|
|
|
|
|
|
|
// Atomic XOR
|
|
|
|
|
atomicXor(&g_odata[10], tid);
|
2016-02-02 14:50:55 +05:30
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
testKernelSub(g_odata);
|
|
|
|
|
}
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
template<typename T>
|
|
|
|
|
__global__ void testKernel(T* g_odata) {
|
|
|
|
|
// Atomic addition
|
|
|
|
|
atomicAdd(&g_odata[0], 10);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
testKernelIntegral(g_odata);
|
|
|
|
|
testKernelExch(g_odata);
|
2016-01-26 20:14:33 -06:00
|
|
|
}
|
|
|
|
|
|
2018-06-02 12:27:17 +01:00
|
|
|
template<typename T>
|
|
|
|
|
void runTest() {
|
2016-01-26 20:14:33 -06:00
|
|
|
hipDeviceProp_t deviceProp;
|
|
|
|
|
deviceProp.major = 0;
|
|
|
|
|
deviceProp.minor = 0;
|
|
|
|
|
int dev = 0;
|
|
|
|
|
|
2016-03-06 08:31:04 -06:00
|
|
|
hipGetDeviceProperties(&deviceProp, dev);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Statistics about the GPU device
|
2018-03-12 11:29:03 +05:30
|
|
|
printf(
|
|
|
|
|
"> GPU device has %d Multi-Processors, "
|
|
|
|
|
"SM %d.%d compute capabilities\n\n",
|
|
|
|
|
deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
|
2017-12-13 12:32:16 +05:30
|
|
|
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
unsigned int numThreads = 256;
|
|
|
|
|
unsigned int numBlocks = 64;
|
|
|
|
|
unsigned int numData = 11;
|
2018-06-02 12:27:17 +01:00
|
|
|
unsigned int memSize = sizeof(T) * numData;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
// allocate mem for the result on host side
|
2018-06-02 12:27:17 +01:00
|
|
|
T* hOData = (T*)malloc(memSize);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
// initialize the memory
|
|
|
|
|
for (unsigned int i = 0; i < numData; i++) hOData[i] = 0;
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
// To make the AND and XOR tests generate something other than 0...
|
2016-01-26 20:14:33 -06:00
|
|
|
hOData[8] = hOData[10] = 0xff;
|
|
|
|
|
|
|
|
|
|
// allocate device memory for result
|
2018-06-02 12:27:17 +01:00
|
|
|
T* dOData;
|
2018-03-12 11:29:03 +05:30
|
|
|
hipMalloc((void**)&dOData, memSize);
|
2016-01-26 20:14:33 -06:00
|
|
|
// copy host memory to device to initialize to zero
|
2018-03-12 11:29:03 +05:30
|
|
|
hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// execute the kernel
|
2018-06-02 12:27:17 +01:00
|
|
|
hipLaunchKernelGGL(
|
|
|
|
|
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
// Copy result from device to host
|
|
|
|
|
hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost);
|
2016-01-26 20:14:33 -06:00
|
|
|
|
|
|
|
|
// Compute reference solution
|
|
|
|
|
testResult = computeGold(hOData, numThreads * numBlocks);
|
|
|
|
|
|
|
|
|
|
// Cleanup memory
|
|
|
|
|
free(hOData);
|
|
|
|
|
hipFree(dOData);
|
2016-06-17 14:56:53 -05:00
|
|
|
|
|
|
|
|
passed();
|
2016-01-26 20:14:33 -06:00
|
|
|
}
|
2018-06-02 12:27:17 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
|
printf("%s starting...\n", sampleName);
|
|
|
|
|
|
|
|
|
|
runTest<int>();
|
|
|
|
|
runTest<unsigned int>();
|
|
|
|
|
runTest<unsigned long long>();
|
|
|
|
|
runTest<float>();
|
|
|
|
|
runTest<double>();
|
|
|
|
|
|
|
|
|
|
hipDeviceReset();
|
|
|
|
|
printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!");
|
|
|
|
|
exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
|
2018-09-17 15:26:45 +05:30
|
|
|
}
|