Improve copy testing implementation.

- add tests for (unpinned/pinned) x H2H x D2D.
- Free memory at end of test.


[ROCm/hip commit: 134d7975ce]
This commit is contained in:
Ben Sander
2016-02-12 17:39:44 -06:00
rodzic c441d5ec29
commit 83eb66eb8e
4 zmienionych plików z 214 dodań i 34 usunięć
+55 -17
Wyświetl plik
@@ -132,6 +132,7 @@ struct StagingBuffer {
StagingBuffer(ihipDevice_t *device, size_t bufferSize) ;
~StagingBuffer();
void CopyHostToDevice(void* dst, const void* src, size_t sizeBytes);
@@ -163,6 +164,7 @@ struct ihipDevice_t
StagingBuffer *_staging_device2host;
public:
void reset();
void init(unsigned device_index, hc::accelerator acc);
hipError_t getProperties(hipDeviceProp_t* prop);
@@ -172,6 +174,17 @@ public:
//=================================================================================================
//
//Reset the device - this is called from hipDeviceReset.
//Device may be reset multiple times, and may be reset after init.
void ihipDevice_t::reset()
{
_staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024);
_staging_device2host = NULL;
};
//---
void ihipDevice_t::init(unsigned device_index, hc::accelerator acc)
{
_device_index = device_index;
@@ -194,8 +207,7 @@ void ihipDevice_t::init(unsigned device_index, hc::accelerator acc)
this->_streams.push_back(_null_stream);
tprintf(TRACE_SYNC, "created device with null_stream=%p\n", _null_stream);
_staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024);
_staging_device2host = NULL;
this->reset();
};
#if 0
@@ -205,6 +217,13 @@ ihipDevice_t::~ihipDevice_t()
delete _null_stream;
_null_stream = NULL;
}
if (_staging_device2host) {
delete _staging_device2host;
}
if (_staging_host2device){
delete _staging_host2device;
}
}
#endif
@@ -848,6 +867,7 @@ hipError_t hipDeviceReset(void)
ihipDevice_t *device = ihipGetTlsDefaultDevice();
if (device) {
am_memtracker_reset(device->_acc);
device->reset(); // re-allocate required resources.
}
#endif
@@ -1562,6 +1582,18 @@ StagingBuffer::StagingBuffer(ihipDevice_t *device, size_t bufferSize) :
}
};
//---
StagingBuffer::~StagingBuffer()
{
for (int i=0; i<numBuffers; i++) {
if (_pinnedStagingBuffer[i]) {
hc::AM_free(_pinnedStagingBuffer[i]);
_pinnedStagingBuffer[i] = NULL;
}
hsa_signal_destroy(_completion_signal[i]);
}
}
//---
void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) {
@@ -1622,21 +1654,25 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size
}
}
switch (kind) {
case hipMemcpyHostToDevice:
if (srcNotTracked) {
device->_staging_host2device->CopyHostToDevice(dst, src, sizeBytes);
} else {
assert(0); // TODO
//hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
}
break;
case hipMemcpyDeviceToHost:
// TODO - optimize the copy here.
hc::AM_copy(dst, src, sizeBytes);
break;
default:
assert(0); // TODO
if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) {
if (useStagingBuffer) {
device->_staging_host2device->CopyHostToDevice(dst, src, sizeBytes);
}
} else if ((kind == hipMemcpyDeviceToHost) && (dstNotTracked)) {
// TODO - optimize the copy here.
hc::AM_copy(dst, src, sizeBytes);
} else {
// Let HSA runtime handle it:
// TODO - need buffer pool for the signals:
hsa_signal_t completion_signal;
hsa_signal_create(1, 0, NULL, &completion_signal);
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, completion_signal);
if (hsa_status == HSA_STATUS_SUCCESS) {
hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
}
hsa_signal_destroy(completion_signal);
}
}
#endif
@@ -1815,6 +1851,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
//---
hipError_t hipFree(void* ptr)
{
// TODO - ensure this pointer was created by hipMalloc and not hipMallocHost
std::call_once(hip_initialized, ihipInit);
@@ -1831,6 +1868,7 @@ hipError_t hipFree(void* ptr)
hipError_t hipFreeHost(void* ptr)
{
// TODO - ensure this pointer was created by hipMallocHost and not hipMalloc
std::call_once(hip_initialized, ihipInit);
if (ptr) {
+96 -9
Wyświetl plik
@@ -23,24 +23,21 @@ THE SOFTWARE.
#include "test_common.h"
int main(int argc, char *argv[])
// Test simple H2D copies and back.
void simpleTest1()
{
HipTest::parseStandardArguments(argc, argv, true);
printf ("test: %s\n", __func__);
size_t Nbytes = N*sizeof(int);
printf ("N=%zu Nbytes=%6.2fMB\n", N, Nbytes/1024.0/1024.0);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
@@ -50,8 +47,98 @@ int main(int argc, char *argv[])
HIPCHECK (hipDeviceSynchronize());
HipTest::checkVectorADD(A_h, B_h, C_h, N);
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, false);
HIPCHECK (hipDeviceReset());
printf (" %s success\n", __func__);
}
// Test many different kinds of memory copies:
template <typename T>
void memcpyKind(bool usePinnedHost, bool useHostToHost, bool useMemkindDefault)
{
printf ("test: %s\n", __func__);
T *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
size_t Nbytes = N*sizeof(T);
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, usePinnedHost);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
T *A_hh = NULL;
T *B_hh = NULL;
T *C_dd = NULL;
// Allocate some extra arrays:
HIPCHECK ( hipMalloc(&C_dd, Nbytes) );
if (useHostToHost) {
if (usePinnedHost) {
HIPCHECK ( hipMallocHost(&A_hh, Nbytes) );
HIPCHECK ( hipMallocHost(&B_hh, Nbytes) );
} else {
A_hh = (T*)malloc(Nbytes);
B_hh = (T*)malloc(Nbytes);
}
// Do some extra host copies here to mix things up:
HIPCHECK ( hipMemcpy(A_hh, A_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
HIPCHECK ( hipMemcpy(B_hh, B_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
HIPCHECK ( hipMemcpy(A_d, A_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
HIPCHECK ( hipMemcpy(B_d, B_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
} else {
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
}
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N);
#if 0
// Do some extra host copies here to mix things up:
HIPCHECK ( hipMemcpy(C_dd, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
//Destroy the original C_d:
HIPCHECK ( hipMemset(C_d, 0x5A, Nbytes));
HIPCHECK ( hipMemcpy(C_h, C_dd, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
#else
HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
#endif
HIPCHECK ( hipDeviceSynchronize() );
HipTest::checkVectorADD(A_h, B_h, C_h, N);
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);
HIPCHECK ( hipDeviceReset() );
printf (" %s success\n", __func__);
}
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
simpleTest1();
memcpyKind<float>(false, false, false);
memcpyKind<float>(true, false, false);
//memcpyKind<float>(true);
passed();
}
+62 -8
Wyświetl plik
@@ -96,7 +96,7 @@ vectorADD(hipLaunchParm lp,
template <typename T>
void initArrays(T **A_d, T **B_d, T **C_d,
T **A_h, T **B_h, T **C_h,
size_t N)
size_t N, bool usePinnedHost=false)
{
size_t Nbytes = N*sizeof(T);
@@ -110,14 +110,32 @@ void initArrays(T **A_d, T **B_d, T **C_d,
HIPCHECK ( hipMalloc(C_d, Nbytes) );
}
if (A_h)
*A_h = (T*)malloc(Nbytes);
if (B_h)
*B_h = (T*)malloc(Nbytes);
if (usePinnedHost) {
if (A_h) {
HIPCHECK ( hipMallocHost(A_h, Nbytes) );
}
if (B_h) {
HIPCHECK ( hipMallocHost(B_h, Nbytes) );
}
if (C_h) {
HIPCHECK ( hipMallocHost(C_h, Nbytes) );
}
} else {
if (A_h) {
*A_h = (T*)malloc(Nbytes);
HIPASSERT(*A_h != NULL);
}
if (B_h) {
*B_h = (T*)malloc(Nbytes);
HIPASSERT(*B_h != NULL);
}
if (C_h)
*C_h = (T*)malloc(Nbytes);
if (C_h) {
*C_h = (T*)malloc(Nbytes);
HIPASSERT(*C_h != NULL);
}
}
// Initialize the host data:
@@ -130,7 +148,43 @@ void initArrays(T **A_d, T **B_d, T **C_d,
}
template <typename T>
void freeArrays(T *A_d, T *B_d, T *C_d,
T *A_h, T *B_h, T *C_h, bool usePinnedHost)
{
if (A_d) {
HIPCHECK ( hipFree(A_d) );
}
if (B_d) {
HIPCHECK ( hipFree(B_d) );
}
if (C_d) {
HIPCHECK ( hipFree(C_d) );
}
if (usePinnedHost) {
if (A_h) {
HIPCHECK (hipFreeHost(A_h));
}
if (B_h) {
HIPCHECK (hipFreeHost(B_h));
}
if (C_h) {
HIPCHECK (hipFreeHost(C_h));
}
} else {
if (A_h) {
free (A_h);
}
if (B_h) {
free (B_h);
}
if (C_h) {
free (C_h);
}
}
}
// Assumes C_h contains vector add of A_h + B_h
+1
Wyświetl plik
@@ -91,6 +91,7 @@ syn keyword hipFunctionName hipD3D9UnmapResources
syn keyword hipFunctionName hipD3D9UnregisterResource
syn keyword hipFunctionName hipDeviceGetProperties
syn keyword hipFunctionName hipDeviceSynchronize
syn keyword hipFunctionName hipDeviceReset
syn keyword hipFunctionName hipEventCreate
syn keyword hipFunctionName hipEventDestroy
syn keyword hipFunctionName hipEventElapsedTime