Merge branch 'hiparray' into amd-develop

Change-Id: I63ca7b1db7b593ac5cfb3fd7cd5d08d6e4075a4c


[ROCm/hip commit: 7022986ab2]
Этот коммит содержится в:
Maneesh Gupta
2016-07-21 12:29:56 +05:30
родитель 7e95a3a38e
Коммит b160ff52fc
7 изменённых файлов: 646 добавлений и 31 удалений
+7
Просмотреть файл
@@ -279,6 +279,8 @@ while (@ARGV) {
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
#--------
# Memory management:
@@ -293,6 +295,9 @@ while (@ARGV) {
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
#--------
# Coordinate Indexing and Dimensions:
@@ -493,8 +498,10 @@ while (@ARGV) {
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
$ft{'tex'} += s/\bcudaArray\b/hipArrary/g;
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
$ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g;
$ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g;
}
+14
Просмотреть файл
@@ -760,6 +760,20 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags)
*/
hipError_t hipHostUnregister(void* hostPtr) ;
/**
* Allocates at least width (in bytes) * height bytes of linear memory
* Padding may occur to ensure alighnment requirements are met for the given row
* The change in width size due to padding will be returned in *pitch.
* Currently the alignment is set to 128 bytes
*
* @param[out] ptr Pointer to the allocated device memory
* @param[out] pitch Pitch for allocation (in bytes)
* @param[in] width Requested pitched allocation width (in bytes)
* @param[in] height Requested pitched allocation height
* @return Error code
*/
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
/**
* @brief Free memory allocated by the hcc hip memory allocation API.
+72 -9
Просмотреть файл
@@ -38,9 +38,20 @@ THE SOFTWARE.
//Texture - TODO - likely need to move this to a separate file only included with kernel compilation.
#define hipTextureType1D 1
typedef enum {
hipChannelFormatKindSigned = 0,
hipChannelFormatKindUnsigned,
hipChannelFormatKindFloat,
hipChannelFormatKindNone
} hipChannelFormatKind;
typedef struct hipChannelFormatDesc {
// TODO - this has 4-5 well-defined fields, we could just copy...
int _dummy;
int x;
int y;
int z;
int w;
hipChannelFormatKind f;
} hipChannelFormatDesc;
typedef enum hipTextureReadMode
@@ -67,14 +78,39 @@ struct texture : public textureReference {
const T * _dataPtr; // pointer to underlying data.
//texture() : filterMode(hipFilterModePoint), normalized(false), _dataPtr(NULL) {};
unsigned int width;
unsigned int height;
};
#endif
typedef struct hipArray {
unsigned int width;
unsigned int height;
hipChannelFormatKind f;
void* data; //FIXME: generalize this
} hipArray;
#define tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr])
#define tex2D(_tex, _dx, _dy) \
_tex._dataPtr[(unsigned int)_dx + (unsigned int)_dy*(_tex.width)]
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
size_t width, size_t height = 0, unsigned int flags = 0);
hipError_t hipFreeArray(hipArray* array);
//
// dpitch, spitch, and width in bytes
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
// wOffset, width, and spitch in bytes
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
const void* src, size_t count, hipMemcpyKind kind);
/**
@@ -125,11 +161,31 @@ hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *te
*
*
**/
template <class T>
hipChannelFormatDesc hipCreateChannelDesc()
{
hipChannelFormatDesc desc;
return desc;
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
// descriptors
template <typename T> inline hipChannelFormatDesc hipCreateChannelDesc() {
return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
}
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int>() {
int e = (int)sizeof(int) * 8;
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
}
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
int e = (int)sizeof(unsigned int) * 8;
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
}
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long>() {
int e = (int)sizeof(long) * 8;
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
}
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
int e = (int)sizeof(unsigned long) * 8;
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
}
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
int e = (int)sizeof(float) * 8;
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
}
/*
@@ -178,6 +234,13 @@ hipError_t hipBindTexture(size_t *offset,
return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size);
}
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> &tex, hipArray* array) {
tex.width = array->width;
tex.height = array->height;
tex._dataPtr = static_cast<const T*>(array->data);
return hipSuccess;
}
/*
* @brief Unbinds the textuer bound to @p tex
@@ -187,9 +250,9 @@ hipError_t hipBindTexture(size_t *offset,
* @return #hipSuccess
**/
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> *tex)
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> &tex)
{
tex->_dataPtr = NULL;
tex._dataPtr = NULL;
return hipSuccess;
}
+235
Просмотреть файл
@@ -198,6 +198,116 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
return hipHostMalloc(ptr, sizeBytes, 0);
}
// width in bytes
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
HIP_INIT_API(ptr, pitch, width, height);
hipError_t hip_status = hipSuccess;
if(width == 0 || height == 0)
return ihipLogStatus(hipErrorUnknown);
// hardcoded 128 bytes
*pitch = ((((int)width-1)/128) + 1)*128;
const size_t sizeBytes = (*pitch)*height;
auto device = ihipGetTlsDefaultDevice();
//err = hipMalloc(ptr, (*pitch)*height);
if (device) {
const unsigned am_flags = 0;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
}
}
}
} else {
hip_status = hipErrorMemoryAllocation;
}
return ihipLogStatus(hip_status);
}
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
hipChannelFormatDesc cd;
cd.x = x; cd.y = y; cd.z = z; cd.w = w;
cd.f = f;
return cd;
}
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
size_t width, size_t height, unsigned int flags) {
HIP_INIT_API(array, desc, width, height, flags);
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
*array = (hipArray*)malloc(sizeof(hipArray));
array[0]->width = width;
array[0]->height = height;
array[0]->f = desc->f;
void ** ptr = &array[0]->data;
if (device) {
const unsigned am_flags = 0;
const size_t size = width*height;
switch(desc->f) {
case hipChannelFormatKindSigned:
*ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags);
break;
case hipChannelFormatKindUnsigned:
*ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags);
break;
case hipChannelFormatKindFloat:
*ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags);
break;
case hipChannelFormatKindNone:
*ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags);
break;
default:
hip_status = hipErrorUnknown;
break;
}
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
}
}
}
} else {
hip_status = hipErrorMemoryAllocation;
}
return ihipLogStatus(hip_status);
}
//---
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
@@ -364,6 +474,108 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
return ihipLogStatus(e);
}
// dpitch, spitch, and width in bytes
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind);
if(width > dpitch || width > spitch)
return ihipLogStatus(hipErrorUnknown);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
try {
for(int i = 0; i < height; ++i) {
stream->locked_copySync((unsigned char*)dst + i*dpitch, (unsigned char*)src + i*spitch, width, kind);
}
}
catch (ihipException ex) {
e = ex._code;
}
return ihipLogStatus(e);
}
// wOffset, width, and spitch in bytes
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
size_t byteSize;
if(dst) {
switch(dst[0].f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorUnknown);
}
if((wOffset + width > (dst->width * byteSize)) || width > spitch) {
return ihipLogStatus(hipErrorUnknown);
}
size_t src_w = spitch;
size_t dst_w = (dst->width)*byteSize;
try {
for(int i = 0; i < height; ++i) {
stream->locked_copySync((unsigned char*)dst->data + i*dst_w, (unsigned char*)src + i*src_w, width, kind);
}
}
catch (ihipException ex) {
e = ex._code;
}
return ihipLogStatus(e);
}
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
const void* src, size_t count, hipMemcpyKind kind) {
HIP_INIT_API(dst, wOffset, hOffset, src, count, kind);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
try {
stream->locked_copySync((char *)dst->data + wOffset, src, count, kind);
}
catch (ihipException ex) {
e = ex._code;
}
return ihipLogStatus(e);
}
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
/** @return #hipErrorInvalidValue
@@ -566,5 +778,28 @@ hipError_t hipFreeHost(void* ptr)
return hipHostFree(ptr);
}
hipError_t hipFreeArray(hipArray* array)
{
HIP_INIT_API(array);
hipError_t hipStatus = hipErrorInvalidDevicePointer;
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
if(array->data) {
hc::accelerator acc;
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data);
if(status == AM_SUCCESS){
if(amPointerInfo._hostPointer == NULL){
hc::am_free(array->data);
hipStatus = hipSuccess;
}
}
}
return ihipLogStatus(hipStatus);
}
+5
Просмотреть файл
@@ -178,6 +178,10 @@ build_hip_executable (hipTestMemcpyPin hipTestMemcpyPin.cpp)
#build_hip_executable (hipDynamicShared hipDynamicShared.cpp)
build_hip_executable (hipLaunchParm hipLaunchParm.cpp)
if (${HIP_PLATFORM} STREQUAL "hcc")
build_hip_executable (hipArray hipArray.cpp)
endif()
make_test(hipEventRecord --iterations 10)
make_test(hipEnvVarDriver " " )
make_test(hipLaunchParm " ")
@@ -204,6 +208,7 @@ if (${HIP_MULTI_GPU})
endif()
if (${HIP_PLATFORM} STREQUAL "hcc")
make_test(hipArray " ")
make_test(hipFuncSetDevice " ")
endif()
+239
Просмотреть файл
@@ -0,0 +1,239 @@
#include "hip_runtime.h"
#include "test_common.h"
void printSep()
{
printf ("======================================================================================\n");
}
//---
// Test copies of a matrix numW by numH
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
//
// IN: numW: number of elements in the 1st dimension used for allocation
// IN: numH: number of elements in the 2nd dimension used for allocation
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
//
template <typename T>
void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost)
{
size_t width = numW * sizeof(T);
size_t sizeElements = width * numH;
printf("memcpy2Dtest: %s<%s> size=%lu (%6.2fMB) W: %d, H:%d, usePinnedHost: %d\n",
__func__,
TYPENAME(T),
sizeElements, sizeElements/1024.0/1024.0,
(int)numW, (int)numH, usePinnedHost);
T *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
size_t pitch_A, pitch_B, pitch_C;
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initArrays2DPitch(&A_d, &B_d, &C_d, &pitch_A, &pitch_B, &pitch_C, numW, numH);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
HIPCHECK (hipMemcpy2D (A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2D (B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, (pitch_C/sizeof(T))*numH);
HIPCHECK (hipMemcpy2D (C_h, width, C_d, pitch_C, width, numH, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);
printf (" %s success\n", __func__);
}
//---
// Test copies of a matrix numW by numH into a hipArray data structure
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
//
// IN: numW: number of elements in the 1st dimension used for allocation
// IN: numH: number of elements in the 2nd dimension used for allocation. If this is 1, then the 1-dimensional copy API
// would be used
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
// IN: usePitch: If true, pads additional memory. This is only valid in the 2-dimensional case
//
template <typename T>
void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch=false)
{
size_t width = numW * sizeof(T);
size_t sizeElements = width * numH;
printf("memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: %d\n",
__func__,
TYPENAME(T),
sizeElements, sizeElements/1024.0/1024.0,
(int)numW, (int)numH, usePinnedHost, usePitch);
hipArray *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
// 1D
if ((numW >= 1) && (numH == 1)) {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
HIPCHECK (hipMemcpyToArray (A_d, 0, 0, (void *)A_h, width, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpyToArray (B_d, 0, 0, (void *)B_h, width, hipMemcpyHostToDevice) );
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);
HIPCHECK (hipMemcpy (C_h, C_d->data, width, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW);
}
// 2D
else if ((numW >= 1) && (numH >= 1)) {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0);
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
if (usePitch) {
T *A_p, *B_p, *C_p;
size_t pitch_A, pitch_B, pitch_C;
HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH);
HIPCHECK (hipMemcpy2D (A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2D (B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_p, pitch_A, width, numH, hipMemcpyDeviceToDevice) );
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_p, pitch_B, width, numH, hipMemcpyDeviceToDevice) );
hipFree(A_p);
hipFree(B_p);
hipFree(C_p);
}
else {
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_h, width, width, numH, hipMemcpyHostToDevice) );
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_h, width, width, numH, hipMemcpyHostToDevice) );
}
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW*numH);
HIPCHECK (hipMemcpy2D ((void*)C_h, width, (void*)C_d->data, width, width, numH, hipMemcpyDeviceToHost) );
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
}
// Unknown
else {
HIPASSERT("Incompatible dimensions" && 0);
}
hipFreeArray(A_d);
hipFreeArray(B_d);
hipFreeArray(C_d);
HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
printf (" %s success\n", __func__);
}
//---
//Try many different sizes to memory copy.
template <typename T>
void memcpyArraytest_size(size_t maxElem=0, size_t offset=0)
{
printf ("test: %s<%s>\n", __func__, TYPENAME(T));
int deviceId;
HIPCHECK(hipGetDevice(&deviceId));
size_t free, total;
HIPCHECK(hipMemGetInfo(&free, &total));
if (maxElem == 0) {
maxElem = free/sizeof(T)/5;
}
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n",
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset);
// Test 1D
for (size_t elem=64; elem+offset<=maxElem; elem*=2) {
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, 1, 0); // unpinned host
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, 1, 1); // pinned host
}
// Test 2D
size_t maxElem2D = sqrt(maxElem);
for (size_t elem=64; elem+offset<=maxElem2D; elem*=2) {
HIPCHECK ( hipDeviceReset() );
memcpyArraytest<T>(elem+offset, elem+offset, 0, 1); // use pitch
}
}
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
printf ("info: set device to %d\n", p_gpuDevice);
HIPCHECK(hipSetDevice(p_gpuDevice));
if (p_tests & 0x1) {
printf ("\n\n=== tests&1 (types)\n");
printSep();
HIPCHECK ( hipDeviceReset() );
size_t width = N/6;
size_t height = N/6;
memcpy2Dtest<float>(321, 211, 0);
memcpy2Dtest<double>(322, 211, 0);
memcpy2Dtest<char>(320, 211, 0);
memcpy2Dtest<int>(323, 211, 0);
printf ("===\n\n\n");
printf ("\n\n=== tests&1 (types)\n");
printSep();
// 2D
memcpyArraytest<float>(320, 211, 0, 0);
memcpyArraytest<unsigned int>(322, 211, 0, 0);
memcpyArraytest<int>(320, 211, 0, 0);
memcpyArraytest<float>(320, 211, 0, 1);
memcpyArraytest<float>(322, 211, 0, 1);
memcpyArraytest<int>(320, 211, 0, 1);
printSep();
// 1D
memcpyArraytest<float>(320, 1, 0);
memcpyArraytest<unsigned int>(322, 1, 0);
memcpyArraytest<int>(320, 1, 0);
printf ("===\n\n\n");
}
if (p_tests & 0x4) {
printf ("\n\n=== tests&4 (test sizes and offsets)\n");
printSep();
HIPCHECK ( hipDeviceReset() );
printSep();
memcpyArraytest_size<float>(0,0);
printSep();
memcpyArraytest_size<float>(0,64);
printSep();
memcpyArraytest_size<float>(1024*1024,13);
printSep();
memcpyArraytest_size<float>(1024*1024,50);
}
passed();
}
+74 -22
Просмотреть файл
@@ -142,22 +142,11 @@ vectorADD(hipLaunchParm lp,
template <typename T>
void initArrays(T **A_d, T **B_d, T **C_d,
T **A_h, T **B_h, T **C_h,
size_t N, bool usePinnedHost=false)
void initArraysForHost(T **A_h, T **B_h, T **C_h,
size_t N, bool usePinnedHost=false)
{
size_t Nbytes = N*sizeof(T);
if (A_d) {
HIPCHECK ( hipMalloc(A_d, Nbytes) );
}
if (B_d) {
HIPCHECK ( hipMalloc(B_d, Nbytes) );
}
if (C_d) {
HIPCHECK ( hipMalloc(C_d, Nbytes) );
}
if (usePinnedHost) {
if (A_h) {
HIPCHECK ( hipHostMalloc((void**)A_h, Nbytes) );
@@ -173,7 +162,7 @@ void initArrays(T **A_d, T **B_d, T **C_d,
*A_h = (T*)malloc(Nbytes);
HIPASSERT(*A_h != NULL);
}
if (B_h) {
*B_h = (T*)malloc(Nbytes);
HIPASSERT(*B_h != NULL);
@@ -185,7 +174,6 @@ void initArrays(T **A_d, T **B_d, T **C_d,
}
}
// Initialize the host data:
for (size_t i=0; i<N; i++) {
if (A_h)
@@ -195,21 +183,31 @@ void initArrays(T **A_d, T **B_d, T **C_d,
}
}
template <typename T>
void freeArrays(T *A_d, T *B_d, T *C_d,
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
void initArrays(T **A_d, T **B_d, T **C_d,
T **A_h, T **B_h, T **C_h,
size_t N, bool usePinnedHost=false)
{
size_t Nbytes = N*sizeof(T);
if (A_d) {
HIPCHECK ( hipFree(A_d) );
HIPCHECK ( hipMalloc(A_d, Nbytes) );
}
if (B_d) {
HIPCHECK ( hipFree(B_d) );
HIPCHECK ( hipMalloc(B_d, Nbytes) );
}
if (C_d) {
HIPCHECK ( hipFree(C_d) );
HIPCHECK ( hipMalloc(C_d, Nbytes) );
}
initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
}
template <typename T>
void freeArraysForHost(T *A_h, T *B_h, T *C_h, bool usePinnedHost)
{
if (usePinnedHost) {
if (A_h) {
HIPCHECK (hipHostFree(A_h));
@@ -231,9 +229,63 @@ void freeArrays(T *A_d, T *B_d, T *C_d,
free (C_h);
}
}
}
template <typename T>
void freeArrays(T *A_d, T *B_d, T *C_d,
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
{
if (A_d) {
HIPCHECK ( hipFree(A_d) );
}
if (B_d) {
HIPCHECK ( hipFree(B_d) );
}
if (C_d) {
HIPCHECK ( hipFree(C_d) );
}
freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
}
#if defined(__HIP_PLATFORM_HCC__)
template <typename T>
void initArrays2DPitch(T **A_d, T **B_d, T **C_d,
size_t *pitch_A, size_t *pitch_B, size_t *pitch_C,
size_t numW, size_t numH)
{
if (A_d) {
HIPCHECK ( hipMallocPitch((void**)A_d, pitch_A, numW*sizeof(T), numH) );
}
if (B_d) {
HIPCHECK ( hipMallocPitch((void**)B_d, pitch_B, numW*sizeof(T), numH) );
}
if (C_d) {
HIPCHECK ( hipMallocPitch((void**)C_d, pitch_C, numW*sizeof(T), numH) );
}
HIPASSERT(*pitch_A == *pitch_B);
HIPASSERT(*pitch_A == *pitch_C)
}
inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d,
const hipChannelFormatDesc *desc, const size_t numW, const size_t numH, const unsigned int flags)
{
if (A_d) {
HIPCHECK( hipMallocArray(A_d, desc, numW, numH, flags));
}
if (B_d) {
HIPCHECK( hipMallocArray(B_d, desc, numW, numH, flags));
}
if (C_d) {
HIPCHECK( hipMallocArray(C_d, desc, numW, numH, flags));
}
}
#endif
// Assumes C_h contains vector add of A_h + B_h
// Calls the test "failed" macro if a mismatch is detected.