Merge branch 'hiparray' into amd-develop
Change-Id: I63ca7b1db7b593ac5cfb3fd7cd5d08d6e4075a4c
[ROCm/hip commit: 7022986ab2]
Этот коммит содержится в:
@@ -279,6 +279,8 @@ while (@ARGV) {
|
||||
|
||||
$ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
|
||||
|
||||
#--------
|
||||
# Memory management:
|
||||
@@ -293,6 +295,9 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g;
|
||||
$ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g;
|
||||
$ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g;
|
||||
|
||||
|
||||
#--------
|
||||
# Coordinate Indexing and Dimensions:
|
||||
@@ -493,8 +498,10 @@ while (@ARGV) {
|
||||
$ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g;
|
||||
$ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g;
|
||||
|
||||
$ft{'tex'} += s/\bcudaArray\b/hipArrary/g;
|
||||
$ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g;
|
||||
$ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g;
|
||||
$ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g;
|
||||
$ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g;
|
||||
}
|
||||
|
||||
|
||||
@@ -760,6 +760,20 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
*/
|
||||
hipError_t hipHostUnregister(void* hostPtr) ;
|
||||
|
||||
/**
|
||||
* Allocates at least width (in bytes) * height bytes of linear memory
|
||||
* Padding may occur to ensure alighnment requirements are met for the given row
|
||||
* The change in width size due to padding will be returned in *pitch.
|
||||
* Currently the alignment is set to 128 bytes
|
||||
*
|
||||
* @param[out] ptr Pointer to the allocated device memory
|
||||
* @param[out] pitch Pitch for allocation (in bytes)
|
||||
* @param[in] width Requested pitched allocation width (in bytes)
|
||||
* @param[in] height Requested pitched allocation height
|
||||
* @return Error code
|
||||
*/
|
||||
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated by the hcc hip memory allocation API.
|
||||
|
||||
@@ -38,9 +38,20 @@ THE SOFTWARE.
|
||||
//Texture - TODO - likely need to move this to a separate file only included with kernel compilation.
|
||||
#define hipTextureType1D 1
|
||||
|
||||
typedef enum {
|
||||
hipChannelFormatKindSigned = 0,
|
||||
hipChannelFormatKindUnsigned,
|
||||
hipChannelFormatKindFloat,
|
||||
hipChannelFormatKindNone
|
||||
|
||||
} hipChannelFormatKind;
|
||||
|
||||
typedef struct hipChannelFormatDesc {
|
||||
// TODO - this has 4-5 well-defined fields, we could just copy...
|
||||
int _dummy;
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
hipChannelFormatKind f;
|
||||
} hipChannelFormatDesc;
|
||||
|
||||
typedef enum hipTextureReadMode
|
||||
@@ -67,14 +78,39 @@ struct texture : public textureReference {
|
||||
const T * _dataPtr; // pointer to underlying data.
|
||||
|
||||
//texture() : filterMode(hipFilterModePoint), normalized(false), _dataPtr(NULL) {};
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef struct hipArray {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
hipChannelFormatKind f;
|
||||
void* data; //FIXME: generalize this
|
||||
} hipArray;
|
||||
|
||||
|
||||
#define tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr])
|
||||
|
||||
#define tex2D(_tex, _dx, _dy) \
|
||||
_tex._dataPtr[(unsigned int)_dx + (unsigned int)_dy*(_tex.width)]
|
||||
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height = 0, unsigned int flags = 0);
|
||||
|
||||
hipError_t hipFreeArray(hipArray* array);
|
||||
//
|
||||
// dpitch, spitch, and width in bytes
|
||||
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
|
||||
|
||||
// wOffset, width, and spitch in bytes
|
||||
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
||||
size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
|
||||
|
||||
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
const void* src, size_t count, hipMemcpyKind kind);
|
||||
|
||||
|
||||
/**
|
||||
@@ -125,11 +161,31 @@ hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *te
|
||||
*
|
||||
*
|
||||
**/
|
||||
template <class T>
|
||||
hipChannelFormatDesc hipCreateChannelDesc()
|
||||
{
|
||||
hipChannelFormatDesc desc;
|
||||
return desc;
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
|
||||
|
||||
// descriptors
|
||||
template <typename T> inline hipChannelFormatDesc hipCreateChannelDesc() {
|
||||
return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
|
||||
}
|
||||
template <> inline hipChannelFormatDesc hipCreateChannelDesc<int>() {
|
||||
int e = (int)sizeof(int) * 8;
|
||||
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
|
||||
}
|
||||
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
|
||||
}
|
||||
template <> inline hipChannelFormatDesc hipCreateChannelDesc<long>() {
|
||||
int e = (int)sizeof(long) * 8;
|
||||
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
|
||||
}
|
||||
template <> inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
|
||||
}
|
||||
template <> inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
|
||||
int e = (int)sizeof(float) * 8;
|
||||
return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -178,6 +234,13 @@ hipError_t hipBindTexture(size_t *offset,
|
||||
return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size);
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> &tex, hipArray* array) {
|
||||
tex.width = array->width;
|
||||
tex.height = array->height;
|
||||
tex._dataPtr = static_cast<const T*>(array->data);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Unbinds the textuer bound to @p tex
|
||||
@@ -187,9 +250,9 @@ hipError_t hipBindTexture(size_t *offset,
|
||||
* @return #hipSuccess
|
||||
**/
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> *tex)
|
||||
hipError_t hipUnbindTexture(struct texture<T, dim, readMode> &tex)
|
||||
{
|
||||
tex->_dataPtr = NULL;
|
||||
tex._dataPtr = NULL;
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -198,6 +198,116 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
|
||||
return hipHostMalloc(ptr, sizeBytes, 0);
|
||||
}
|
||||
|
||||
// width in bytes
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
|
||||
|
||||
HIP_INIT_API(ptr, pitch, width, height);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if(width == 0 || height == 0)
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
|
||||
// hardcoded 128 bytes
|
||||
*pitch = ((((int)width-1)/128) + 1)*128;
|
||||
const size_t sizeBytes = (*pitch)*height;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
|
||||
//err = hipMalloc(ptr, (*pitch)*height);
|
||||
if (device) {
|
||||
const unsigned am_flags = 0;
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
|
||||
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
|
||||
}
|
||||
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
|
||||
hipChannelFormatDesc cd;
|
||||
cd.x = x; cd.y = y; cd.z = z; cd.w = w;
|
||||
cd.f = f;
|
||||
return cd;
|
||||
}
|
||||
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, unsigned int flags) {
|
||||
|
||||
HIP_INIT_API(array, desc, width, height, flags);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
|
||||
*array = (hipArray*)malloc(sizeof(hipArray));
|
||||
array[0]->width = width;
|
||||
array[0]->height = height;
|
||||
|
||||
array[0]->f = desc->f;
|
||||
|
||||
void ** ptr = &array[0]->data;
|
||||
|
||||
if (device) {
|
||||
const unsigned am_flags = 0;
|
||||
const size_t size = width*height;
|
||||
|
||||
switch(desc->f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
*ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
*ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
*ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
*ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags);
|
||||
break;
|
||||
default:
|
||||
hip_status = hipErrorUnknown;
|
||||
break;
|
||||
}
|
||||
if (size && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
|
||||
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
|
||||
@@ -364,6 +474,108 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
// dpitch, spitch, and width in bytes
|
||||
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind);
|
||||
|
||||
if(width > dpitch || width > spitch)
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
hc::completion_future marker;
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
try {
|
||||
for(int i = 0; i < height; ++i) {
|
||||
stream->locked_copySync((unsigned char*)dst + i*dpitch, (unsigned char*)src + i*spitch, width, kind);
|
||||
}
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
// wOffset, width, and spitch in bytes
|
||||
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
||||
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
hc::completion_future marker;
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
size_t byteSize;
|
||||
if(dst) {
|
||||
switch(dst[0].f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
byteSize = sizeof(int);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
byteSize = sizeof(unsigned int);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
byteSize = sizeof(float);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
byteSize = sizeof(size_t);
|
||||
break;
|
||||
default:
|
||||
byteSize = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
}
|
||||
|
||||
if((wOffset + width > (dst->width * byteSize)) || width > spitch) {
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
}
|
||||
|
||||
size_t src_w = spitch;
|
||||
size_t dst_w = (dst->width)*byteSize;
|
||||
|
||||
try {
|
||||
for(int i = 0; i < height; ++i) {
|
||||
stream->locked_copySync((unsigned char*)dst->data + i*dst_w, (unsigned char*)src + i*src_w, width, kind);
|
||||
}
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
const void* src, size_t count, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, wOffset, hOffset, src, count, kind);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
hc::completion_future marker;
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
try {
|
||||
stream->locked_copySync((char *)dst->data + wOffset, src, count, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
/** @return #hipErrorInvalidValue
|
||||
@@ -566,5 +778,28 @@ hipError_t hipFreeHost(void* ptr)
|
||||
return hipHostFree(ptr);
|
||||
}
|
||||
|
||||
hipError_t hipFreeArray(hipArray* array)
|
||||
{
|
||||
HIP_INIT_API(array);
|
||||
|
||||
hipError_t hipStatus = hipErrorInvalidDevicePointer;
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
|
||||
if(array->data) {
|
||||
hc::accelerator acc;
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data);
|
||||
if(status == AM_SUCCESS){
|
||||
if(amPointerInfo._hostPointer == NULL){
|
||||
hc::am_free(array->data);
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipStatus);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -178,6 +178,10 @@ build_hip_executable (hipTestMemcpyPin hipTestMemcpyPin.cpp)
|
||||
#build_hip_executable (hipDynamicShared hipDynamicShared.cpp)
|
||||
build_hip_executable (hipLaunchParm hipLaunchParm.cpp)
|
||||
|
||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
build_hip_executable (hipArray hipArray.cpp)
|
||||
endif()
|
||||
|
||||
make_test(hipEventRecord --iterations 10)
|
||||
make_test(hipEnvVarDriver " " )
|
||||
make_test(hipLaunchParm " ")
|
||||
@@ -204,6 +208,7 @@ if (${HIP_MULTI_GPU})
|
||||
endif()
|
||||
|
||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
make_test(hipArray " ")
|
||||
make_test(hipFuncSetDevice " ")
|
||||
endif()
|
||||
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
#include "hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
void printSep()
|
||||
{
|
||||
printf ("======================================================================================\n");
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
// Test copies of a matrix numW by numH
|
||||
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
|
||||
//
|
||||
// IN: numW: number of elements in the 1st dimension used for allocation
|
||||
// IN: numH: number of elements in the 2nd dimension used for allocation
|
||||
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
|
||||
//
|
||||
template <typename T>
|
||||
void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost)
|
||||
{
|
||||
|
||||
size_t width = numW * sizeof(T);
|
||||
size_t sizeElements = width * numH;
|
||||
|
||||
printf("memcpy2Dtest: %s<%s> size=%lu (%6.2fMB) W: %d, H:%d, usePinnedHost: %d\n",
|
||||
__func__,
|
||||
TYPENAME(T),
|
||||
sizeElements, sizeElements/1024.0/1024.0,
|
||||
(int)numW, (int)numH, usePinnedHost);
|
||||
|
||||
T *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
size_t pitch_A, pitch_B, pitch_C;
|
||||
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HipTest::initArrays2DPitch(&A_d, &B_d, &C_d, &pitch_A, &pitch_B, &pitch_C, numW, numH);
|
||||
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
|
||||
|
||||
HIPCHECK (hipMemcpy2D (A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
HIPCHECK (hipMemcpy2D (B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, (pitch_C/sizeof(T))*numH);
|
||||
|
||||
HIPCHECK (hipMemcpy2D (C_h, width, C_d, pitch_C, width, numH, hipMemcpyDeviceToHost) );
|
||||
|
||||
HIPCHECK ( hipDeviceSynchronize() );
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
|
||||
|
||||
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);
|
||||
|
||||
printf (" %s success\n", __func__);
|
||||
}
|
||||
|
||||
//---
|
||||
// Test copies of a matrix numW by numH into a hipArray data structure
|
||||
// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result.
|
||||
//
|
||||
// IN: numW: number of elements in the 1st dimension used for allocation
|
||||
// IN: numH: number of elements in the 2nd dimension used for allocation. If this is 1, then the 1-dimensional copy API
|
||||
// would be used
|
||||
// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host memory with malloc.
|
||||
// IN: usePitch: If true, pads additional memory. This is only valid in the 2-dimensional case
|
||||
//
|
||||
template <typename T>
|
||||
void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch=false)
|
||||
{
|
||||
|
||||
size_t width = numW * sizeof(T);
|
||||
size_t sizeElements = width * numH;
|
||||
|
||||
printf("memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: %d\n",
|
||||
__func__,
|
||||
TYPENAME(T),
|
||||
sizeElements, sizeElements/1024.0/1024.0,
|
||||
(int)numW, (int)numH, usePinnedHost, usePitch);
|
||||
|
||||
hipArray *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
// 1D
|
||||
if ((numW >= 1) && (numH == 1)) {
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0);
|
||||
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
|
||||
|
||||
HIPCHECK (hipMemcpyToArray (A_d, 0, 0, (void *)A_h, width, hipMemcpyHostToDevice) );
|
||||
HIPCHECK (hipMemcpyToArray (B_d, 0, 0, (void *)B_h, width, hipMemcpyHostToDevice) );
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);
|
||||
|
||||
HIPCHECK (hipMemcpy (C_h, C_d->data, width, hipMemcpyDeviceToHost) );
|
||||
|
||||
HIPCHECK ( hipDeviceSynchronize() );
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, numW);
|
||||
|
||||
}
|
||||
// 2D
|
||||
else if ((numW >= 1) && (numH >= 1)) {
|
||||
|
||||
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0);
|
||||
HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);
|
||||
|
||||
if (usePitch) {
|
||||
T *A_p, *B_p, *C_p;
|
||||
size_t pitch_A, pitch_B, pitch_C;
|
||||
|
||||
HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH);
|
||||
HIPCHECK (hipMemcpy2D (A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
HIPCHECK (hipMemcpy2D (B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
|
||||
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_p, pitch_A, width, numH, hipMemcpyDeviceToDevice) );
|
||||
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_p, pitch_B, width, numH, hipMemcpyDeviceToDevice) );
|
||||
|
||||
hipFree(A_p);
|
||||
hipFree(B_p);
|
||||
hipFree(C_p);
|
||||
}
|
||||
else {
|
||||
HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_h, width, width, numH, hipMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW*numH);
|
||||
|
||||
HIPCHECK (hipMemcpy2D ((void*)C_h, width, (void*)C_d->data, width, width, numH, hipMemcpyDeviceToHost) );
|
||||
|
||||
HIPCHECK ( hipDeviceSynchronize() );
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
|
||||
}
|
||||
// Unknown
|
||||
else {
|
||||
HIPASSERT("Incompatible dimensions" && 0);
|
||||
}
|
||||
|
||||
hipFreeArray(A_d);
|
||||
hipFreeArray(B_d);
|
||||
hipFreeArray(C_d);
|
||||
HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
|
||||
|
||||
printf (" %s success\n", __func__);
|
||||
|
||||
}
|
||||
|
||||
//---
|
||||
//Try many different sizes to memory copy.
|
||||
template <typename T>
|
||||
void memcpyArraytest_size(size_t maxElem=0, size_t offset=0)
|
||||
{
|
||||
printf ("test: %s<%s>\n", __func__, TYPENAME(T));
|
||||
|
||||
int deviceId;
|
||||
HIPCHECK(hipGetDevice(&deviceId));
|
||||
|
||||
size_t free, total;
|
||||
HIPCHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
if (maxElem == 0) {
|
||||
maxElem = free/sizeof(T)/5;
|
||||
}
|
||||
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n",
|
||||
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset);
|
||||
|
||||
// Test 1D
|
||||
for (size_t elem=64; elem+offset<=maxElem; elem*=2) {
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
memcpyArraytest<T>(elem+offset, 1, 0); // unpinned host
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
memcpyArraytest<T>(elem+offset, 1, 1); // pinned host
|
||||
}
|
||||
|
||||
// Test 2D
|
||||
size_t maxElem2D = sqrt(maxElem);
|
||||
|
||||
for (size_t elem=64; elem+offset<=maxElem2D; elem*=2) {
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
memcpyArraytest<T>(elem+offset, elem+offset, 0, 1); // use pitch
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
printf ("info: set device to %d\n", p_gpuDevice);
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
printf ("\n\n=== tests&1 (types)\n");
|
||||
printSep();
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
size_t width = N/6;
|
||||
size_t height = N/6;
|
||||
memcpy2Dtest<float>(321, 211, 0);
|
||||
memcpy2Dtest<double>(322, 211, 0);
|
||||
memcpy2Dtest<char>(320, 211, 0);
|
||||
memcpy2Dtest<int>(323, 211, 0);
|
||||
printf ("===\n\n\n");
|
||||
|
||||
printf ("\n\n=== tests&1 (types)\n");
|
||||
printSep();
|
||||
// 2D
|
||||
memcpyArraytest<float>(320, 211, 0, 0);
|
||||
memcpyArraytest<unsigned int>(322, 211, 0, 0);
|
||||
memcpyArraytest<int>(320, 211, 0, 0);
|
||||
memcpyArraytest<float>(320, 211, 0, 1);
|
||||
memcpyArraytest<float>(322, 211, 0, 1);
|
||||
memcpyArraytest<int>(320, 211, 0, 1);
|
||||
printSep();
|
||||
// 1D
|
||||
memcpyArraytest<float>(320, 1, 0);
|
||||
memcpyArraytest<unsigned int>(322, 1, 0);
|
||||
memcpyArraytest<int>(320, 1, 0);
|
||||
printf ("===\n\n\n");
|
||||
}
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
printf ("\n\n=== tests&4 (test sizes and offsets)\n");
|
||||
printSep();
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
printSep();
|
||||
memcpyArraytest_size<float>(0,0);
|
||||
printSep();
|
||||
memcpyArraytest_size<float>(0,64);
|
||||
printSep();
|
||||
memcpyArraytest_size<float>(1024*1024,13);
|
||||
printSep();
|
||||
memcpyArraytest_size<float>(1024*1024,50);
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
@@ -142,22 +142,11 @@ vectorADD(hipLaunchParm lp,
|
||||
|
||||
|
||||
template <typename T>
|
||||
void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
T **A_h, T **B_h, T **C_h,
|
||||
size_t N, bool usePinnedHost=false)
|
||||
void initArraysForHost(T **A_h, T **B_h, T **C_h,
|
||||
size_t N, bool usePinnedHost=false)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIPCHECK ( hipMalloc(A_d, Nbytes) );
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK ( hipMalloc(B_d, Nbytes) );
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK ( hipMalloc(C_d, Nbytes) );
|
||||
}
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK ( hipHostMalloc((void**)A_h, Nbytes) );
|
||||
@@ -173,7 +162,7 @@ void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*A_h != NULL);
|
||||
}
|
||||
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*B_h != NULL);
|
||||
@@ -185,7 +174,6 @@ void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Initialize the host data:
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (A_h)
|
||||
@@ -195,21 +183,31 @@ void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void freeArrays(T *A_d, T *B_d, T *C_d,
|
||||
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
|
||||
void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
T **A_h, T **B_h, T **C_h,
|
||||
size_t N, bool usePinnedHost=false)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIPCHECK ( hipFree(A_d) );
|
||||
HIPCHECK ( hipMalloc(A_d, Nbytes) );
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK ( hipFree(B_d) );
|
||||
HIPCHECK ( hipMalloc(B_d, Nbytes) );
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK ( hipFree(C_d) );
|
||||
HIPCHECK ( hipMalloc(C_d, Nbytes) );
|
||||
}
|
||||
|
||||
initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void freeArraysForHost(T *A_h, T *B_h, T *C_h, bool usePinnedHost)
|
||||
{
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK (hipHostFree(A_h));
|
||||
@@ -231,9 +229,63 @@ void freeArrays(T *A_d, T *B_d, T *C_d,
|
||||
free (C_h);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void freeArrays(T *A_d, T *B_d, T *C_d,
|
||||
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
|
||||
{
|
||||
if (A_d) {
|
||||
HIPCHECK ( hipFree(A_d) );
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK ( hipFree(B_d) );
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK ( hipFree(C_d) );
|
||||
}
|
||||
|
||||
freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
template <typename T>
|
||||
void initArrays2DPitch(T **A_d, T **B_d, T **C_d,
|
||||
size_t *pitch_A, size_t *pitch_B, size_t *pitch_C,
|
||||
size_t numW, size_t numH)
|
||||
{
|
||||
if (A_d) {
|
||||
HIPCHECK ( hipMallocPitch((void**)A_d, pitch_A, numW*sizeof(T), numH) );
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK ( hipMallocPitch((void**)B_d, pitch_B, numW*sizeof(T), numH) );
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK ( hipMallocPitch((void**)C_d, pitch_C, numW*sizeof(T), numH) );
|
||||
}
|
||||
|
||||
HIPASSERT(*pitch_A == *pitch_B);
|
||||
HIPASSERT(*pitch_A == *pitch_C)
|
||||
|
||||
}
|
||||
|
||||
inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d,
|
||||
const hipChannelFormatDesc *desc, const size_t numW, const size_t numH, const unsigned int flags)
|
||||
{
|
||||
|
||||
if (A_d) {
|
||||
HIPCHECK( hipMallocArray(A_d, desc, numW, numH, flags));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK( hipMallocArray(B_d, desc, numW, numH, flags));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK( hipMallocArray(C_d, desc, numW, numH, flags));
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
// Assumes C_h contains vector add of A_h + B_h
|
||||
// Calls the test "failed" macro if a mismatch is detected.
|
||||
|
||||
Ссылка в новой задаче
Block a user