Improve copy testing implementation.
- add tests for (unpinned/pinned) x H2H x D2D.
- Free memory at end of test.
[ROCm/clr commit: 1128610801]
Этот коммит содержится в:
@@ -132,6 +132,7 @@ struct StagingBuffer {
|
||||
|
||||
|
||||
StagingBuffer(ihipDevice_t *device, size_t bufferSize) ;
|
||||
~StagingBuffer();
|
||||
|
||||
void CopyHostToDevice(void* dst, const void* src, size_t sizeBytes);
|
||||
|
||||
@@ -163,6 +164,7 @@ struct ihipDevice_t
|
||||
StagingBuffer *_staging_device2host;
|
||||
|
||||
public:
|
||||
void reset();
|
||||
void init(unsigned device_index, hc::accelerator acc);
|
||||
hipError_t getProperties(hipDeviceProp_t* prop);
|
||||
|
||||
@@ -172,6 +174,17 @@ public:
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
//
|
||||
//Reset the device - this is called from hipDeviceReset.
|
||||
//Device may be reset multiple times, and may be reset after init.
|
||||
void ihipDevice_t::reset()
|
||||
{
|
||||
_staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024);
|
||||
_staging_device2host = NULL;
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
void ihipDevice_t::init(unsigned device_index, hc::accelerator acc)
|
||||
{
|
||||
_device_index = device_index;
|
||||
@@ -194,8 +207,7 @@ void ihipDevice_t::init(unsigned device_index, hc::accelerator acc)
|
||||
this->_streams.push_back(_null_stream);
|
||||
tprintf(TRACE_SYNC, "created device with null_stream=%p\n", _null_stream);
|
||||
|
||||
_staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024);
|
||||
_staging_device2host = NULL;
|
||||
this->reset();
|
||||
};
|
||||
|
||||
#if 0
|
||||
@@ -205,6 +217,13 @@ ihipDevice_t::~ihipDevice_t()
|
||||
delete _null_stream;
|
||||
_null_stream = NULL;
|
||||
}
|
||||
|
||||
if (_staging_device2host) {
|
||||
delete _staging_device2host;
|
||||
}
|
||||
if (_staging_host2device){
|
||||
delete _staging_host2device;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -848,6 +867,7 @@ hipError_t hipDeviceReset(void)
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
if (device) {
|
||||
am_memtracker_reset(device->_acc);
|
||||
device->reset(); // re-allocate required resources.
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1562,6 +1582,18 @@ StagingBuffer::StagingBuffer(ihipDevice_t *device, size_t bufferSize) :
|
||||
}
|
||||
};
|
||||
|
||||
//---
|
||||
StagingBuffer::~StagingBuffer()
|
||||
{
|
||||
for (int i=0; i<numBuffers; i++) {
|
||||
if (_pinnedStagingBuffer[i]) {
|
||||
hc::AM_free(_pinnedStagingBuffer[i]);
|
||||
_pinnedStagingBuffer[i] = NULL;
|
||||
}
|
||||
hsa_signal_destroy(_completion_signal[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) {
|
||||
@@ -1622,21 +1654,25 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size
|
||||
}
|
||||
}
|
||||
|
||||
switch (kind) {
|
||||
case hipMemcpyHostToDevice:
|
||||
if (srcNotTracked) {
|
||||
device->_staging_host2device->CopyHostToDevice(dst, src, sizeBytes);
|
||||
} else {
|
||||
assert(0); // TODO
|
||||
//hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
}
|
||||
break;
|
||||
case hipMemcpyDeviceToHost:
|
||||
// TODO - optimize the copy here.
|
||||
hc::AM_copy(dst, src, sizeBytes);
|
||||
break;
|
||||
default:
|
||||
assert(0); // TODO
|
||||
if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) {
|
||||
if (useStagingBuffer) {
|
||||
device->_staging_host2device->CopyHostToDevice(dst, src, sizeBytes);
|
||||
}
|
||||
} else if ((kind == hipMemcpyDeviceToHost) && (dstNotTracked)) {
|
||||
// TODO - optimize the copy here.
|
||||
hc::AM_copy(dst, src, sizeBytes);
|
||||
} else {
|
||||
// Let HSA runtime handle it:
|
||||
// TODO - need buffer pool for the signals:
|
||||
hsa_signal_t completion_signal;
|
||||
hsa_signal_create(1, 0, NULL, &completion_signal);
|
||||
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, completion_signal);
|
||||
|
||||
if (hsa_status == HSA_STATUS_SUCCESS) {
|
||||
hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
}
|
||||
|
||||
hsa_signal_destroy(completion_signal);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1815,6 +1851,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
|
||||
//---
|
||||
hipError_t hipFree(void* ptr)
|
||||
{
|
||||
// TODO - ensure this pointer was created by hipMalloc and not hipMallocHost
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
|
||||
|
||||
@@ -1831,6 +1868,7 @@ hipError_t hipFree(void* ptr)
|
||||
|
||||
hipError_t hipFreeHost(void* ptr)
|
||||
{
|
||||
// TODO - ensure this pointer was created by hipMallocHost and not hipMalloc
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
|
||||
if (ptr) {
|
||||
|
||||
@@ -23,24 +23,21 @@ THE SOFTWARE.
|
||||
#include "test_common.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
// Test simple H2D copies and back.
|
||||
void simpleTest1()
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
printf ("test: %s\n", __func__);
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
|
||||
printf ("N=%zu Nbytes=%6.2fMB\n", N, Nbytes/1024.0/1024.0);
|
||||
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
|
||||
printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
|
||||
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
@@ -50,8 +47,98 @@ int main(int argc, char *argv[])
|
||||
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK (hipDeviceReset());
|
||||
|
||||
printf (" %s success\n", __func__);
|
||||
}
|
||||
|
||||
|
||||
// Test many different kinds of memory copies:
|
||||
|
||||
template <typename T>
|
||||
void memcpyKind(bool usePinnedHost, bool useHostToHost, bool useMemkindDefault)
|
||||
{
|
||||
printf ("test: %s\n", __func__);
|
||||
|
||||
|
||||
T *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
size_t Nbytes = N*sizeof(T);
|
||||
|
||||
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, usePinnedHost);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
T *A_hh = NULL;
|
||||
T *B_hh = NULL;
|
||||
T *C_dd = NULL;
|
||||
|
||||
// Allocate some extra arrays:
|
||||
|
||||
HIPCHECK ( hipMalloc(&C_dd, Nbytes) );
|
||||
|
||||
|
||||
if (useHostToHost) {
|
||||
if (usePinnedHost) {
|
||||
HIPCHECK ( hipMallocHost(&A_hh, Nbytes) );
|
||||
HIPCHECK ( hipMallocHost(&B_hh, Nbytes) );
|
||||
} else {
|
||||
A_hh = (T*)malloc(Nbytes);
|
||||
B_hh = (T*)malloc(Nbytes);
|
||||
}
|
||||
|
||||
|
||||
// Do some extra host copies here to mix things up:
|
||||
HIPCHECK ( hipMemcpy(A_hh, A_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
|
||||
HIPCHECK ( hipMemcpy(B_hh, B_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
|
||||
|
||||
|
||||
HIPCHECK ( hipMemcpy(A_d, A_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
HIPCHECK ( hipMemcpy(B_d, B_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
} else {
|
||||
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N);
|
||||
|
||||
#if 0
|
||||
// Do some extra host copies here to mix things up:
|
||||
HIPCHECK ( hipMemcpy(C_dd, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
|
||||
|
||||
//Destroy the original C_d:
|
||||
HIPCHECK ( hipMemset(C_d, 0x5A, Nbytes));
|
||||
|
||||
HIPCHECK ( hipMemcpy(C_h, C_dd, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
|
||||
#else
|
||||
HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
HIPCHECK ( hipDeviceSynchronize() );
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
|
||||
printf (" %s success\n", __func__);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
|
||||
simpleTest1();
|
||||
|
||||
memcpyKind<float>(false, false, false);
|
||||
memcpyKind<float>(true, false, false);
|
||||
//memcpyKind<float>(true);
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ vectorADD(hipLaunchParm lp,
|
||||
template <typename T>
|
||||
void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
T **A_h, T **B_h, T **C_h,
|
||||
size_t N)
|
||||
size_t N, bool usePinnedHost=false)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(T);
|
||||
|
||||
@@ -110,14 +110,32 @@ void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
HIPCHECK ( hipMalloc(C_d, Nbytes) );
|
||||
}
|
||||
|
||||
if (A_h)
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
|
||||
if (B_h)
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK ( hipMallocHost(A_h, Nbytes) );
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK ( hipMallocHost(B_h, Nbytes) );
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK ( hipMallocHost(C_h, Nbytes) );
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*A_h != NULL);
|
||||
}
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*B_h != NULL);
|
||||
}
|
||||
|
||||
if (C_h)
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
if (C_h) {
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*C_h != NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Initialize the host data:
|
||||
@@ -130,7 +148,43 @@ void initArrays(T **A_d, T **B_d, T **C_d,
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void freeArrays(T *A_d, T *B_d, T *C_d,
|
||||
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
|
||||
{
|
||||
if (A_d) {
|
||||
HIPCHECK ( hipFree(A_d) );
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK ( hipFree(B_d) );
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK ( hipFree(C_d) );
|
||||
}
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK (hipFreeHost(A_h));
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK (hipFreeHost(B_h));
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK (hipFreeHost(C_h));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
free (A_h);
|
||||
}
|
||||
if (B_h) {
|
||||
free (B_h);
|
||||
}
|
||||
if (C_h) {
|
||||
free (C_h);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Assumes C_h contains vector add of A_h + B_h
|
||||
|
||||
@@ -91,6 +91,7 @@ syn keyword hipFunctionName hipD3D9UnmapResources
|
||||
syn keyword hipFunctionName hipD3D9UnregisterResource
|
||||
syn keyword hipFunctionName hipDeviceGetProperties
|
||||
syn keyword hipFunctionName hipDeviceSynchronize
|
||||
syn keyword hipFunctionName hipDeviceReset
|
||||
syn keyword hipFunctionName hipEventCreate
|
||||
syn keyword hipFunctionName hipEventDestroy
|
||||
syn keyword hipFunctionName hipEventElapsedTime
|
||||
|
||||
Ссылка в новой задаче
Block a user