bf4ba9e09e
commit 9548493fa754b3bf5c31cbdc2211db1e73e8c07c Author: Jack Chung <whchung@gmail.com> Date: Mon May 23 11:57:23 2016 +0800 Rename hipExternShared test to hipDynamicShared Change-Id: I180d9d539420fb69cfc121eceaa7db9da03483b2 commit 827081f8244a38f010789d556db0c4ff7b6422d8 Author: Jack Chung <whchung@gmail.com> Date: Mon May 23 11:56:27 2016 +0800 Rename HIP_DECLARE_EXTERN_SHARED to HIP_DYNAMIC_SHARED Change-Id: I22362d179812ac547e0f11ba4e2bb999050e08ae commit 4c277228ed41af187739610fa17eab1fb144c947 Author: Jack Chung <whchung@gmail.com> Date: Thu May 19 17:49:52 2016 +0800 Adopt new interface to get dynamic LDS in hc.hpp Change-Id: I47b433b714633a4c97df87c40a0b1d3386429a00 commit 5a36117d777064113a528dc47b42e8c8413baa97 Author: Jack Chung <whchung@gmail.com> Date: Thu May 19 11:29:24 2016 +0800 Add test patterns for regular expression to match "extern __shared__" These test patterns should better be saved as an individual test case, but I'm not familiar with HIP test structures so I leave them as comments in hipify as of now. Change-Id: I7fee89c89b9e73de2133357a226ec0c769733531 commit 1b26284168c7f5339f63338fd0149bed5d994656 Author: Jack Chung <whchung@gmail.com> Date: Thu May 19 11:25:23 2016 +0800 Add one HIP unit test to use HIP_DECLARE_EXTERN_SHARED Change-Id: I4d9907815920693a74ea9d575fe26e7c67636109 commit 77b816ee5972b13d829d5bbcf06fbfd07acea2af Author: Jack Chung <whchung@gmail.com> Date: Wed May 18 19:18:59 2016 +0800 Adopt HIP_ prefix for DECLARE_EXTERN_SHARED macro Change-Id: I555ded16b449b67d2e20904013d86fe1ded6a2be commit ef0997939c3578a9ae11621bf21c0416f04d2622 Author: Jack Chung <whchung@gmail.com> Date: Wed May 18 17:42:04 2016 +0800 Modify hipify to support converting extern __shared__ to DECLARE_EXTERN_SHARED macro Added regular expression to search & replace extern __shared__ declarations to DECLARE_EXTERN_SHARED macro. Limitation: - Won't work if "extern __shared__" is declared at global scope Sample Usages: extern __shared__ double foo[]; extern __shared__ unsigned int foo[]; extern volatile __shared__ double foo[]; extern volatile __shared__ unsigned int sdata[]; extern __shared__ volatile unsigned int sdata[]; extern __shared__ T s[]; extern __shared__ T::type s[]; extern __shared__ blah<T>::type s[]; extern __shared__ typename mapper<Float>::type s_data[]; extern __attribute__((used)) __shared__ typename mapper<Float>::type s_data[]; Change-Id: I2be0b7039adeddb789f5a2b067d403a43fdc3e26 commit 93ff268724493aedfacdcd5a5aa9a100f4ebaed0 Author: Jack Chung <whchung@gmail.com> Date: Wed May 18 15:13:09 2016 +0800 Introduce DECLARE_EXTERN_SHARED macro to encapsulate "extern __shared__" decls Change-Id: I93b2d37c763195b0ca9fd0afee78605a1e3272db commit cff9c95412de343cc6405158b5acc4f1029267ff Author: Jack Chung <whchung@gmail.com> Date: Wed May 18 12:53:54 2016 +0800 Add __get_dynamic_groupbaseptr() to point to dynamic LDS Change-Id: I97b548d8a691488057617c551a8f331cad7afc77 Change-Id: I84e7875b76fa1f59e860e19c93bd4209cdd1fd2c
139 строки
5.2 KiB
C++
139 строки
5.2 KiB
C++
/*
|
|
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
#include <hip_runtime.h>
|
|
#include "test_common.h"
|
|
|
|
template<typename T>
|
|
__global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T* B_d, T* C_d, size_t numElements, size_t groupElements) {
|
|
|
|
// declare dynamic shared memory
|
|
HIP_DYNAMIC_SHARED(T, sdata)
|
|
|
|
size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
|
size_t tid = hipThreadIdx_x;
|
|
|
|
// initialize dynamic shared memory
|
|
if (tid < groupElements) {
|
|
sdata[tid] = static_cast<T>(tid);
|
|
}
|
|
|
|
// prefix sum inside dynamic shared memory
|
|
if (groupElements >= 512) {
|
|
if (tid >= 256) { sdata[tid] += sdata[tid - 256]; } __syncthreads();
|
|
}
|
|
if (groupElements >= 256) {
|
|
if (tid >= 128) { sdata[tid] += sdata[tid - 128]; } __syncthreads();
|
|
}
|
|
if (groupElements >= 128) {
|
|
if (tid >= 64) { sdata[tid] += sdata[tid - 64]; } __syncthreads();
|
|
}
|
|
if (groupElements >= 64) { sdata[tid] += sdata[tid - 32]; } __syncthreads();
|
|
if (groupElements >= 32) { sdata[tid] += sdata[tid - 16]; } __syncthreads();
|
|
if (groupElements >= 16) { sdata[tid] += sdata[tid - 8]; } __syncthreads();
|
|
if (groupElements >= 8) { sdata[tid] += sdata[tid - 4]; } __syncthreads();
|
|
if (groupElements >= 4) { sdata[tid] += sdata[tid - 2]; } __syncthreads();
|
|
if (groupElements >= 2) { sdata[tid] += sdata[tid - 1]; } __syncthreads();
|
|
|
|
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
|
|
}
|
|
|
|
template<typename T>
|
|
void testExternShared(size_t N, size_t groupElements) {
|
|
size_t Nbytes = N * sizeof(T);
|
|
|
|
T *A_d, *B_d, *C_d;
|
|
T *A_h, *B_h, *C_h;
|
|
|
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
|
|
//printf("blocksPerCU: %d\nthreadsPerBlock: %d\nN: %zu\n", blocksPerCU, threadsPerBlock, N);
|
|
|
|
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
|
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
|
|
|
// calculate the amount of dynamic shared memory required
|
|
size_t groupMemBytes = groupElements * sizeof(T);
|
|
|
|
// launch kernel with dynamic shared memory
|
|
hipLaunchKernel(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks), dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d, N, groupElements);
|
|
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
// verify
|
|
for (size_t i = 0; i < N; ++i) {
|
|
size_t tid = (i % groupElements);
|
|
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
|
|
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
|
|
if (C_h[i] != expected) {
|
|
std::cout << std::fixed << std::setprecision(32);
|
|
std::cout << "At " << i << std::endl;
|
|
std::cout << " Computed:" << C_h[i] << std::endl;
|
|
std::cout << " Expected:" << expected << std::endl;
|
|
std::cout << sumFromSharedMemory << std::endl;
|
|
std::cout << A_h[i] << std::endl;
|
|
std::cout << B_h[i] << std::endl;
|
|
|
|
failed("Failed at index:%zu\n", i);
|
|
}
|
|
}
|
|
|
|
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
HipTest::parseStandardArguments(argc, argv, true);
|
|
|
|
//printf("info: set device to %d\n", p_gpuDevice);
|
|
HIPCHECK(hipSetDevice(p_gpuDevice));
|
|
|
|
testExternShared<float>(1024, 4);
|
|
testExternShared<float>(1024, 8);
|
|
testExternShared<float>(1024, 16);
|
|
testExternShared<float>(1024, 32);
|
|
testExternShared<float>(1024, 64);
|
|
|
|
testExternShared<float>(65536, 4);
|
|
testExternShared<float>(65536, 8);
|
|
testExternShared<float>(65536, 16);
|
|
testExternShared<float>(65536, 32);
|
|
testExternShared<float>(65536, 64);
|
|
|
|
testExternShared<double>(1024, 4);
|
|
testExternShared<double>(1024, 8);
|
|
testExternShared<double>(1024, 16);
|
|
testExternShared<double>(1024, 32);
|
|
testExternShared<double>(1024, 64);
|
|
|
|
testExternShared<double>(65536, 4);
|
|
testExternShared<double>(65536, 8);
|
|
testExternShared<double>(65536, 16);
|
|
testExternShared<double>(65536, 32);
|
|
testExternShared<double>(65536, 64);
|
|
|
|
passed();
|
|
}
|
|
|