2
0
Ficheiros
rocm-systems/tests/src/hipArray.cpp
T
Maneesh Gupta 7022986ab2 Merge branch 'hiparray' into amd-develop
Change-Id: I63ca7b1db7b593ac5cfb3fd7cd5d08d6e4075a4c
2016-07-21 12:29:56 +05:30

240 linhas
8.3 KiB
C++

#include "hip_runtime.h"
#include "test_common.h"
void printSep()
{
printf ("======================================================================================\n");
}
//---
// Test copies of a matrix numW by numH
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
//
// IN: numW: number of elements in the 1st dimension used for allocation
// IN: numH: number of elements in the 2nd dimension used for allocation
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
//
template <typename T>
void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost)
{
size_t width = numW * sizeof(T);
size_t sizeElements = width * numH;
printf("memcpy2Dtest: %s<%s> size=%lu (%6.2fMB) W: %d, H:%d, usePinnedHost: %d\n",
__func__,
TYPENAME(T),
sizeElements, sizeElements/1024.0/1024.0,
(int)numW, (int)numH, usePinnedHost);
T *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
size_t pitch_A, pitch_B, pitch_C;
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initArrays2DPitch(&A_d, &B_d, &C_d, &pitch_A, &pitch_B, &pitch_C, numW, numH);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
HIPCHECK (hipMemcpy2D (A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2D (B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, (pitch_C/sizeof(T))*numH);
HIPCHECK (hipMemcpy2D (C_h, width, C_d, pitch_C, width, numH, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);
printf (" %s success\n", __func__);
}
//---
// Test copies of a matrix numW by numH into a hipArray data structure
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
//
// IN: numW: number of elements in the 1st dimension used for allocation
// IN: numH: number of elements in the 2nd dimension used for allocation. If this is 1, then the 1-dimensional copy API
// would be used
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
// IN: usePitch: If true, pads additional memory. This is only valid in the 2-dimensional case
//
template <typename T>
void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch=false)
{
size_t width = numW * sizeof(T);
size_t sizeElements = width * numH;
printf("memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: %d\n",
__func__,
TYPENAME(T),
sizeElements, sizeElements/1024.0/1024.0,
(int)numW, (int)numH, usePinnedHost, usePitch);
hipArray *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
// 1D
if ((numW >= 1) && (numH == 1)) {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
HIPCHECK (hipMemcpyToArray (A_d, 0, 0, (void *)A_h, width, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpyToArray (B_d, 0, 0, (void *)B_h, width, hipMemcpyHostToDevice) );
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);
HIPCHECK (hipMemcpy (C_h, C_d->data, width, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW);
}
// 2D
else if ((numW >= 1) && (numH >= 1)) {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
if (usePitch) {
T *A_p, *B_p, *C_p;
size_t pitch_A, pitch_B, pitch_C;
HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH);
HIPCHECK (hipMemcpy2D (A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2D (B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_p, pitch_A, width, numH, hipMemcpyDeviceToDevice) );
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_p, pitch_B, width, numH, hipMemcpyDeviceToDevice) );
hipFree(A_p);
hipFree(B_p);
hipFree(C_p);
}
else {
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_h, width, width, numH, hipMemcpyHostToDevice) );
}
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW*numH);
HIPCHECK (hipMemcpy2D ((void*)C_h, width, (void*)C_d->data, width, width, numH, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
}
// Unknown
else {
HIPASSERT("Incompatible dimensions" && 0);
}
hipFreeArray(A_d);
hipFreeArray(B_d);
hipFreeArray(C_d);
HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
printf (" %s success\n", __func__);
}
//---
//Try many different sizes to memory copy.
template <typename T>
void memcpyArraytest_size(size_t maxElem=0, size_t offset=0)
{
printf ("test: %s<%s>\n", __func__, TYPENAME(T));
int deviceId;
HIPCHECK(hipGetDevice(&deviceId));
size_t free, total;
HIPCHECK(hipMemGetInfo(&free, &total));
if (maxElem == 0) {
maxElem = free/sizeof(T)/5;
}
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n",
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset);
// Test 1D
for (size_t elem=64; elem+offset<=maxElem; elem*=2) {
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, 1, 0); // unpinned host
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, 1, 1); // pinned host
}
// Test 2D
size_t maxElem2D = sqrt(maxElem);
for (size_t elem=64; elem+offset<=maxElem2D; elem*=2) {
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, elem+offset, 0, 1); // use pitch
}
}
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
printf ("info: set device to %d\n", p_gpuDevice);
HIPCHECK(hipSetDevice(p_gpuDevice));
if (p_tests & 0x1) {
printf ("\n\n=== tests&1 (types)\n");
printSep();
HIPCHECK ( hipDeviceReset() );
size_t width = N/6;
size_t height = N/6;
memcpy2Dtest<float>(321, 211, 0);
memcpy2Dtest<double>(322, 211, 0);
memcpy2Dtest<char>(320, 211, 0);
memcpy2Dtest<int>(323, 211, 0);
printf ("===\n\n\n");
printf ("\n\n=== tests&1 (types)\n");
printSep();
// 2D
memcpyArraytest<float>(320, 211, 0, 0);
memcpyArraytest<unsigned int>(322, 211, 0, 0);
memcpyArraytest<int>(320, 211, 0, 0);
memcpyArraytest<float>(320, 211, 0, 1);
memcpyArraytest<float>(322, 211, 0, 1);
memcpyArraytest<int>(320, 211, 0, 1);
printSep();
// 1D
memcpyArraytest<float>(320, 1, 0);
memcpyArraytest<unsigned int>(322, 1, 0);
memcpyArraytest<int>(320, 1, 0);
printf ("===\n\n\n");
}
if (p_tests & 0x4) {
printf ("\n\n=== tests&4 (test sizes and offsets)\n");
printSep();
HIPCHECK ( hipDeviceReset() );
printSep();
memcpyArraytest_size<float>(0,0);
printSep();
memcpyArraytest_size<float>(0,64);
printSep();
memcpyArraytest_size<float>(1024*1024,13);
printSep();
memcpyArraytest_size<float>(1024*1024,50);
}
passed();
}