From 7022986ab2cf748d690fc64eb9f168420a8f5614 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 21 Jul 2016 12:29:56 +0530 Subject: [PATCH] Merge branch 'hiparray' into amd-develop Change-Id: I63ca7b1db7b593ac5cfb3fd7cd5d08d6e4075a4c --- bin/hipify | 7 + include/hcc_detail/hip_runtime_api.h | 14 ++ include/hcc_detail/hip_texture.h | 81 ++++++++- src/hip_memory.cpp | 235 ++++++++++++++++++++++++++ tests/src/CMakeLists.txt | 5 + tests/src/hipArray.cpp | 239 +++++++++++++++++++++++++++ tests/src/test_common.h | 96 ++++++++--- 7 files changed, 646 insertions(+), 31 deletions(-) create mode 100644 tests/src/hipArray.cpp diff --git a/bin/hipify b/bin/hipify index ce934dff15..1d1d071a78 100755 --- a/bin/hipify +++ b/bin/hipify @@ -279,6 +279,8 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g; + $ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g; + $ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g; #-------- # Memory management: @@ -293,6 +295,9 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g; $ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g; + $ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g; + $ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g; + #-------- # Coordinate Indexing and Dimensions: @@ -493,8 +498,10 @@ while (@ARGV) { $ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g; $ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g; + $ft{'tex'} += s/\bcudaArray\b/hipArrary/g; $ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g; $ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g; + $ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g; $ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g; } diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 3fd06aae38..daf737563e 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -760,6 +760,20 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) */ hipError_t hipHostUnregister(void* hostPtr) ; +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * @return Error code + */ + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); /** * @brief Free memory allocated by the hcc hip memory allocation API. diff --git a/include/hcc_detail/hip_texture.h b/include/hcc_detail/hip_texture.h index d4c3403ccf..5712e5c333 100644 --- a/include/hcc_detail/hip_texture.h +++ b/include/hcc_detail/hip_texture.h @@ -38,9 +38,20 @@ THE SOFTWARE. //Texture - TODO - likely need to move this to a separate file only included with kernel compilation. #define hipTextureType1D 1 +typedef enum { + hipChannelFormatKindSigned = 0, + hipChannelFormatKindUnsigned, + hipChannelFormatKindFloat, + hipChannelFormatKindNone + +} hipChannelFormatKind; + typedef struct hipChannelFormatDesc { - // TODO - this has 4-5 well-defined fields, we could just copy... - int _dummy; + int x; + int y; + int z; + int w; + hipChannelFormatKind f; } hipChannelFormatDesc; typedef enum hipTextureReadMode @@ -67,14 +78,39 @@ struct texture : public textureReference { const T * _dataPtr; // pointer to underlying data. //texture() : filterMode(hipFilterModePoint), normalized(false), _dataPtr(NULL) {}; + unsigned int width; + unsigned int height; + }; #endif +typedef struct hipArray { + unsigned int width; + unsigned int height; + hipChannelFormatKind f; + void* data; //FIXME: generalize this +} hipArray; #define tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr]) +#define tex2D(_tex, _dx, _dy) \ + _tex._dataPtr[(unsigned int)_dx + (unsigned int)_dy*(_tex.width)] +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, + size_t width, size_t height = 0, unsigned int flags = 0); + +hipError_t hipFreeArray(hipArray* array); + // +// dpitch, spitch, and width in bytes +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +// wOffset, width, and spitch in bytes +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t count, hipMemcpyKind kind); /** @@ -125,11 +161,31 @@ hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *te * * **/ -template -hipChannelFormatDesc hipCreateChannelDesc() -{ - hipChannelFormatDesc desc; - return desc; +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); + +// descriptors +template inline hipChannelFormatDesc hipCreateChannelDesc() { + return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone); +} +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} +template <> inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); } /* @@ -178,6 +234,13 @@ hipError_t hipBindTexture(size_t *offset, return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size); } +template +hipError_t hipBindTextureToArray(struct texture &tex, hipArray* array) { + tex.width = array->width; + tex.height = array->height; + tex._dataPtr = static_cast(array->data); + return hipSuccess; +} /* * @brief Unbinds the textuer bound to @p tex @@ -187,9 +250,9 @@ hipError_t hipBindTexture(size_t *offset, * @return #hipSuccess **/ template -hipError_t hipUnbindTexture(struct texture *tex) +hipError_t hipUnbindTexture(struct texture &tex) { - tex->_dataPtr = NULL; + tex._dataPtr = NULL; return hipSuccess; } diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index cabea6f759..95429b26e6 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -198,6 +198,116 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes) return hipHostMalloc(ptr, sizeBytes, 0); } +// width in bytes +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + + HIP_INIT_API(ptr, pitch, width, height); + + hipError_t hip_status = hipSuccess; + + if(width == 0 || height == 0) + return ihipLogStatus(hipErrorUnknown); + + // hardcoded 128 bytes + *pitch = ((((int)width-1)/128) + 1)*128; + const size_t sizeBytes = (*pitch)*height; + + auto device = ihipGetTlsDefaultDevice(); + + //err = hipMalloc(ptr, (*pitch)*height); + if (device) { + const unsigned am_flags = 0; + *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + + if (sizeBytes && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } else { + hc::am_memtracker_update(*ptr, device->_device_index, 0); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } + } + } + } + } else { + hip_status = hipErrorMemoryAllocation; + } + + return ihipLogStatus(hip_status); + +} + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + hipChannelFormatDesc cd; + cd.x = x; cd.y = y; cd.z = z; cd.w = w; + cd.f = f; + return cd; +} + +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, + size_t width, size_t height, unsigned int flags) { + + HIP_INIT_API(array, desc, width, height, flags); + + hipError_t hip_status = hipSuccess; + + auto device = ihipGetTlsDefaultDevice(); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->width = width; + array[0]->height = height; + + array[0]->f = desc->f; + + void ** ptr = &array[0]->data; + + if (device) { + const unsigned am_flags = 0; + const size_t size = width*height; + + switch(desc->f) { + case hipChannelFormatKindSigned: + *ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags); + break; + case hipChannelFormatKindUnsigned: + *ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags); + break; + case hipChannelFormatKindFloat: + *ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags); + break; + case hipChannelFormatKindNone: + *ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags); + break; + default: + hip_status = hipErrorUnknown; + break; + } + if (size && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } else { + hc::am_memtracker_update(*ptr, device->_device_index, 0); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } + } + } + } + + } else { + hip_status = hipErrorMemoryAllocation; + } + + return ihipLogStatus(hip_status); +} + //--- hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) @@ -364,6 +474,108 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp return ihipLogStatus(e); } +// dpitch, spitch, and width in bytes +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + + HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); + + if(width > dpitch || width > spitch) + return ihipLogStatus(hipErrorUnknown); + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + + hc::completion_future marker; + + hipError_t e = hipSuccess; + + try { + for(int i = 0; i < height; ++i) { + stream->locked_copySync((unsigned char*)dst + i*dpitch, (unsigned char*)src + i*spitch, width, kind); + } + } + catch (ihipException ex) { + e = ex._code; + } + + return ihipLogStatus(e); +} + +// wOffset, width, and spitch in bytes +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + + HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + + hc::completion_future marker; + + hipError_t e = hipSuccess; + + size_t byteSize; + if(dst) { + switch(dst[0].f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 0; + break; + } + } else { + return ihipLogStatus(hipErrorUnknown); + } + + if((wOffset + width > (dst->width * byteSize)) || width > spitch) { + return ihipLogStatus(hipErrorUnknown); + } + + size_t src_w = spitch; + size_t dst_w = (dst->width)*byteSize; + + try { + for(int i = 0; i < height; ++i) { + stream->locked_copySync((unsigned char*)dst->data + i*dst_w, (unsigned char*)src + i*src_w, width, kind); + } + } + catch (ihipException ex) { + e = ex._code; + } + + return ihipLogStatus(e); +} + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t count, hipMemcpyKind kind) { + + HIP_INIT_API(dst, wOffset, hOffset, src, count, kind); + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + + hc::completion_future marker; + + hipError_t e = hipSuccess; + + try { + stream->locked_copySync((char *)dst->data + wOffset, src, count, kind); + } + catch (ihipException ex) { + e = ex._code; + } + + return ihipLogStatus(e); +} + // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. /** @return #hipErrorInvalidValue @@ -566,5 +778,28 @@ hipError_t hipFreeHost(void* ptr) return hipHostFree(ptr); } +hipError_t hipFreeArray(hipArray* array) +{ + HIP_INIT_API(array); + + hipError_t hipStatus = hipErrorInvalidDevicePointer; + + // Synchronize to ensure all work has finished. + ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + + if(array->data) { + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data); + if(status == AM_SUCCESS){ + if(amPointerInfo._hostPointer == NULL){ + hc::am_free(array->data); + hipStatus = hipSuccess; + } + } + } + + return ihipLogStatus(hipStatus); +} diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 723b99a243..127ebb2507 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -178,6 +178,10 @@ build_hip_executable (hipTestMemcpyPin hipTestMemcpyPin.cpp) #build_hip_executable (hipDynamicShared hipDynamicShared.cpp) build_hip_executable (hipLaunchParm hipLaunchParm.cpp) +if (${HIP_PLATFORM} STREQUAL "hcc") + build_hip_executable (hipArray hipArray.cpp) +endif() + make_test(hipEventRecord --iterations 10) make_test(hipEnvVarDriver " " ) make_test(hipLaunchParm " ") @@ -204,6 +208,7 @@ if (${HIP_MULTI_GPU}) endif() if (${HIP_PLATFORM} STREQUAL "hcc") + make_test(hipArray " ") make_test(hipFuncSetDevice " ") endif() diff --git a/tests/src/hipArray.cpp b/tests/src/hipArray.cpp new file mode 100644 index 0000000000..49add786d8 --- /dev/null +++ b/tests/src/hipArray.cpp @@ -0,0 +1,239 @@ +#include "hip_runtime.h" +#include "test_common.h" + +void printSep() +{ + printf ("======================================================================================\n"); +} + + +//--- +// Test copies of a matrix numW by numH +// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result. +// +// IN: numW: number of elements in the 1st dimension used for allocation +// IN: numH: number of elements in the 2nd dimension used for allocation +// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc. +// +template +void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost) +{ + + size_t width = numW * sizeof(T); + size_t sizeElements = width * numH; + + printf("memcpy2Dtest: %s<%s> size=%lu (%6.2fMB) W: %d, H:%d, usePinnedHost: %d\n", + __func__, + TYPENAME(T), + sizeElements, sizeElements/1024.0/1024.0, + (int)numW, (int)numH, usePinnedHost); + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + size_t pitch_A, pitch_B, pitch_C; + + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HipTest::initArrays2DPitch(&A_d, &B_d, &C_d, &pitch_A, &pitch_B, &pitch_C, numW, numH); + HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH); + + HIPCHECK (hipMemcpy2D (A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) ); + HIPCHECK (hipMemcpy2D (B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) ); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, (pitch_C/sizeof(T))*numH); + + HIPCHECK (hipMemcpy2D (C_h, width, C_d, pitch_C, width, numH, hipMemcpyDeviceToHost) ); + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); + + printf (" %s success\n", __func__); +} + +//--- +// Test copies of a matrix numW by numH into a hipArray data structure +// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result. +// +// IN: numW: number of elements in the 1st dimension used for allocation +// IN: numH: number of elements in the 2nd dimension used for allocation. If this is 1, then the 1-dimensional copy API +// would be used +// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc. +// IN: usePitch: If true, pads additional memory. This is only valid in the 2-dimensional case +// +template +void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch=false) +{ + + size_t width = numW * sizeof(T); + size_t sizeElements = width * numH; + + printf("memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: %d\n", + __func__, + TYPENAME(T), + sizeElements, sizeElements/1024.0/1024.0, + (int)numW, (int)numH, usePinnedHost, usePitch); + + hipArray *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + // 1D + if ((numW >= 1) && (numH == 1)) { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0); + HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH); + + HIPCHECK (hipMemcpyToArray (A_d, 0, 0, (void *)A_h, width, hipMemcpyHostToDevice) ); + HIPCHECK (hipMemcpyToArray (B_d, 0, 0, (void *)B_h, width, hipMemcpyHostToDevice) ); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW); + + HIPCHECK (hipMemcpy (C_h, C_d->data, width, hipMemcpyDeviceToHost) ); + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, numW); + + } + // 2D + else if ((numW >= 1) && (numH >= 1)) { + + + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0); + HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH); + + if (usePitch) { + T *A_p, *B_p, *C_p; + size_t pitch_A, pitch_B, pitch_C; + + HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH); + HIPCHECK (hipMemcpy2D (A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) ); + HIPCHECK (hipMemcpy2D (B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) ); + + HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_p, pitch_A, width, numH, hipMemcpyDeviceToDevice) ); + HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_p, pitch_B, width, numH, hipMemcpyDeviceToDevice) ); + + hipFree(A_p); + hipFree(B_p); + hipFree(C_p); + } + else { + HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_h, width, width, numH, hipMemcpyHostToDevice) ); + HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_h, width, width, numH, hipMemcpyHostToDevice) ); + } + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW*numH); + + HIPCHECK (hipMemcpy2D ((void*)C_h, width, (void*)C_d->data, width, width, numH, hipMemcpyDeviceToHost) ); + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH); + } + // Unknown + else { + HIPASSERT("Incompatible dimensions" && 0); + } + + hipFreeArray(A_d); + hipFreeArray(B_d); + hipFreeArray(C_d); + HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost); + + printf (" %s success\n", __func__); + +} + +//--- +//Try many different sizes to memory copy. +template +void memcpyArraytest_size(size_t maxElem=0, size_t offset=0) +{ + printf ("test: %s<%s>\n", __func__, TYPENAME(T)); + + int deviceId; + HIPCHECK(hipGetDevice(&deviceId)); + + size_t free, total; + HIPCHECK(hipMemGetInfo(&free, &total)); + + if (maxElem == 0) { + maxElem = free/sizeof(T)/5; + } + + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", + deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); + + // Test 1D + for (size_t elem=64; elem+offset<=maxElem; elem*=2) { + HIPCHECK ( hipDeviceReset() ); + memcpyArraytest(elem+offset, 1, 0); // unpinned host + HIPCHECK ( hipDeviceReset() ); + memcpyArraytest(elem+offset, 1, 1); // pinned host + } + + // Test 2D + size_t maxElem2D = sqrt(maxElem); + + for (size_t elem=64; elem+offset<=maxElem2D; elem*=2) { + HIPCHECK ( hipDeviceReset() ); + memcpyArraytest(elem+offset, elem+offset, 0, 1); // use pitch + } +} + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + if (p_tests & 0x1) { + printf ("\n\n=== tests&1 (types)\n"); + printSep(); + HIPCHECK ( hipDeviceReset() ); + size_t width = N/6; + size_t height = N/6; + memcpy2Dtest(321, 211, 0); + memcpy2Dtest(322, 211, 0); + memcpy2Dtest(320, 211, 0); + memcpy2Dtest(323, 211, 0); + printf ("===\n\n\n"); + + printf ("\n\n=== tests&1 (types)\n"); + printSep(); + // 2D + memcpyArraytest(320, 211, 0, 0); + memcpyArraytest(322, 211, 0, 0); + memcpyArraytest(320, 211, 0, 0); + memcpyArraytest(320, 211, 0, 1); + memcpyArraytest(322, 211, 0, 1); + memcpyArraytest(320, 211, 0, 1); + printSep(); + // 1D + memcpyArraytest(320, 1, 0); + memcpyArraytest(322, 1, 0); + memcpyArraytest(320, 1, 0); + printf ("===\n\n\n"); + } + + if (p_tests & 0x4) { + printf ("\n\n=== tests&4 (test sizes and offsets)\n"); + printSep(); + HIPCHECK ( hipDeviceReset() ); + printSep(); + memcpyArraytest_size(0,0); + printSep(); + memcpyArraytest_size(0,64); + printSep(); + memcpyArraytest_size(1024*1024,13); + printSep(); + memcpyArraytest_size(1024*1024,50); + } + + passed(); + +} diff --git a/tests/src/test_common.h b/tests/src/test_common.h index 47e5f63a5b..cfde590f78 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -142,22 +142,11 @@ vectorADD(hipLaunchParm lp, template -void initArrays(T **A_d, T **B_d, T **C_d, - T **A_h, T **B_h, T **C_h, - size_t N, bool usePinnedHost=false) +void initArraysForHost(T **A_h, T **B_h, T **C_h, + size_t N, bool usePinnedHost=false) { size_t Nbytes = N*sizeof(T); - if (A_d) { - HIPCHECK ( hipMalloc(A_d, Nbytes) ); - } - if (B_d) { - HIPCHECK ( hipMalloc(B_d, Nbytes) ); - } - if (C_d) { - HIPCHECK ( hipMalloc(C_d, Nbytes) ); - } - if (usePinnedHost) { if (A_h) { HIPCHECK ( hipHostMalloc((void**)A_h, Nbytes) ); @@ -173,7 +162,7 @@ void initArrays(T **A_d, T **B_d, T **C_d, *A_h = (T*)malloc(Nbytes); HIPASSERT(*A_h != NULL); } - + if (B_h) { *B_h = (T*)malloc(Nbytes); HIPASSERT(*B_h != NULL); @@ -185,7 +174,6 @@ void initArrays(T **A_d, T **B_d, T **C_d, } } - // Initialize the host data: for (size_t i=0; i -void freeArrays(T *A_d, T *B_d, T *C_d, - T *A_h, T *B_h, T *C_h, bool usePinnedHost) +void initArrays(T **A_d, T **B_d, T **C_d, + T **A_h, T **B_h, T **C_h, + size_t N, bool usePinnedHost=false) { + size_t Nbytes = N*sizeof(T); + if (A_d) { - HIPCHECK ( hipFree(A_d) ); + HIPCHECK ( hipMalloc(A_d, Nbytes) ); } if (B_d) { - HIPCHECK ( hipFree(B_d) ); + HIPCHECK ( hipMalloc(B_d, Nbytes) ); } if (C_d) { - HIPCHECK ( hipFree(C_d) ); + HIPCHECK ( hipMalloc(C_d, Nbytes) ); } + initArraysForHost(A_h, B_h, C_h, N, usePinnedHost); + +} + + +template +void freeArraysForHost(T *A_h, T *B_h, T *C_h, bool usePinnedHost) +{ if (usePinnedHost) { if (A_h) { HIPCHECK (hipHostFree(A_h)); @@ -231,9 +229,63 @@ void freeArrays(T *A_d, T *B_d, T *C_d, free (C_h); } } - + } +template +void freeArrays(T *A_d, T *B_d, T *C_d, + T *A_h, T *B_h, T *C_h, bool usePinnedHost) +{ + if (A_d) { + HIPCHECK ( hipFree(A_d) ); + } + if (B_d) { + HIPCHECK ( hipFree(B_d) ); + } + if (C_d) { + HIPCHECK ( hipFree(C_d) ); + } + + freeArraysForHost(A_h, B_h, C_h, usePinnedHost); +} + +#if defined(__HIP_PLATFORM_HCC__) +template +void initArrays2DPitch(T **A_d, T **B_d, T **C_d, + size_t *pitch_A, size_t *pitch_B, size_t *pitch_C, + size_t numW, size_t numH) +{ + if (A_d) { + HIPCHECK ( hipMallocPitch((void**)A_d, pitch_A, numW*sizeof(T), numH) ); + } + if (B_d) { + HIPCHECK ( hipMallocPitch((void**)B_d, pitch_B, numW*sizeof(T), numH) ); + } + if (C_d) { + HIPCHECK ( hipMallocPitch((void**)C_d, pitch_C, numW*sizeof(T), numH) ); + } + + HIPASSERT(*pitch_A == *pitch_B); + HIPASSERT(*pitch_A == *pitch_C) + +} + +inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d, + const hipChannelFormatDesc *desc, const size_t numW, const size_t numH, const unsigned int flags) +{ + + if (A_d) { + HIPCHECK( hipMallocArray(A_d, desc, numW, numH, flags)); + } + if (B_d) { + HIPCHECK( hipMallocArray(B_d, desc, numW, numH, flags)); + } + if (C_d) { + HIPCHECK( hipMallocArray(C_d, desc, numW, numH, flags)); + } + +} +#endif // Assumes C_h contains vector add of A_h + B_h // Calls the test "failed" macro if a mismatch is detected.