Merge pull request #255 from AlexVlx/remove_some_trailing_whitespace
Clean up trailing whitespace so as to reduce noise in #246.
Tá an tiomantas seo le fáil i:
@@ -53,7 +53,7 @@ THE SOFTWARE.
|
||||
// define HIP_ENABLE_PRINTF to enable printf
|
||||
#ifdef HIP_ENABLE_PRINTF
|
||||
#define HCC_ENABLE_ACCELERATOR_PRINTF 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//---
|
||||
// Remainder of this file only compiles with HCC
|
||||
@@ -481,7 +481,7 @@ do {\
|
||||
type* var = \
|
||||
(type*)__get_dynamicgroupbaseptr(); \
|
||||
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
|
||||
|
||||
if (shareWithAll) {
|
||||
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr);
|
||||
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
|
||||
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
|
||||
if (s != HSA_STATUS_SUCCESS) {
|
||||
ret = -1;
|
||||
}
|
||||
@@ -126,7 +126,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool
|
||||
if (HIP_INIT_ALLOC != -1) {
|
||||
// TODO , dont' call HIP API directly here:
|
||||
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr != nullptr) {
|
||||
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
|
||||
@@ -255,7 +255,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
@@ -288,10 +288,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
}
|
||||
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable
|
||||
| hipHostMallocMapped
|
||||
| hipHostMallocWriteCombined
|
||||
| hipHostMallocCoherent
|
||||
const unsigned supportedFlags = hipHostMallocPortable
|
||||
| hipHostMallocMapped
|
||||
| hipHostMallocWriteCombined
|
||||
| hipHostMallocCoherent
|
||||
| hipHostMallocNonCoherent;
|
||||
|
||||
|
||||
@@ -304,7 +304,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
|
||||
unsigned amFlags = 0;
|
||||
if (flags & hipHostMallocCoherent) {
|
||||
amFlags = amHostCoherent;
|
||||
@@ -585,7 +585,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
|
||||
hsa_ext_image_data_info_t imageInfo;
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
|
||||
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment);
|
||||
|
||||
if (size && (*ptr == NULL)) {
|
||||
|
||||
@@ -52,7 +52,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
if (!(testMask & p_tests)) {
|
||||
return;
|
||||
}
|
||||
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
|
||||
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
|
||||
testMask, stream, waitStart, syncModeString(syncMode));
|
||||
|
||||
size_t sizeBytes = numElements * sizeof(int);
|
||||
@@ -85,8 +85,8 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
HIPCHECK(hipEventSynchronize(start));
|
||||
}
|
||||
|
||||
|
||||
hipError_t expectedStopError = hipSuccess;
|
||||
|
||||
hipError_t expectedStopError = hipSuccess;
|
||||
|
||||
// How to wait for the events to finish:
|
||||
switch (syncMode) {
|
||||
@@ -97,12 +97,12 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish...
|
||||
break;
|
||||
case syncStopEvent:
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
|
||||
|
||||
|
||||
float t;
|
||||
|
||||
@@ -111,25 +111,25 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
failed ("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e));
|
||||
}
|
||||
|
||||
if (e == hipSuccess)
|
||||
if (e == hipSuccess)
|
||||
assert (t==0.0f);
|
||||
|
||||
|
||||
|
||||
// stop usually ready unless we skipped the synchronization (syncNone)
|
||||
HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError);
|
||||
if (e == hipSuccess)
|
||||
if (e == hipSuccess)
|
||||
assert (t==0.0f);
|
||||
|
||||
|
||||
e = hipEventElapsedTime(&t, start, stop);
|
||||
HIPCHECK_API(e, expectedStopError);
|
||||
if (expectedStopError == hipSuccess)
|
||||
if (expectedStopError == hipSuccess)
|
||||
assert (t>0.0f);
|
||||
printf ("time=%6.2f error=%s\n", t, hipGetErrorName(e));
|
||||
|
||||
e = hipEventElapsedTime(&t, stop, start);
|
||||
HIPCHECK_API(e, expectedStopError);
|
||||
if (expectedStopError == hipSuccess)
|
||||
if (expectedStopError == hipSuccess)
|
||||
assert (t<0.0f);
|
||||
printf ("negtime=%6.2f error=%s\n", t, hipGetErrorName(e));
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ public:
|
||||
|
||||
void offset(int offset) { _offset = offset; };
|
||||
int offset() const { return _offset; };
|
||||
|
||||
|
||||
private:
|
||||
T * _A_d;
|
||||
T* _B_d;
|
||||
@@ -72,7 +72,7 @@ private:
|
||||
|
||||
template<typename T>
|
||||
DeviceMemory<T>::DeviceMemory(size_t numElements)
|
||||
: _maxNumElements(numElements),
|
||||
: _maxNumElements(numElements),
|
||||
_offset(0)
|
||||
{
|
||||
T ** np = nullptr;
|
||||
@@ -93,7 +93,7 @@ DeviceMemory<T>::~DeviceMemory ()
|
||||
HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0);
|
||||
|
||||
HIPCHECK (hipFree(_C_dd));
|
||||
|
||||
|
||||
_C_dd = NULL;
|
||||
};
|
||||
|
||||
@@ -125,7 +125,7 @@ public:
|
||||
T * A_hh;
|
||||
T* B_hh;
|
||||
|
||||
bool _usePinnedHost;
|
||||
bool _usePinnedHost;
|
||||
private:
|
||||
size_t _maxNumElements;
|
||||
|
||||
@@ -165,11 +165,11 @@ HostMemory<T>::HostMemory(size_t numElements, bool usePinnedHost)
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
HostMemory<T>::reset(size_t numElements, bool full)
|
||||
HostMemory<T>::reset(size_t numElements, bool full)
|
||||
{
|
||||
// Initialize the host data:
|
||||
for (size_t i=0; i<numElements; i++) {
|
||||
(A_hh)[i] = 1097.0 + i;
|
||||
(A_hh)[i] = 1097.0 + i;
|
||||
(B_hh)[i] = 1492.0 + i; // Phi
|
||||
|
||||
if (full) {
|
||||
@@ -213,8 +213,8 @@ template <typename T>
|
||||
void memcpytest2(DeviceMemory<T> *dmem, HostMemory<T> *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault)
|
||||
{
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
|
||||
__func__,
|
||||
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
|
||||
__func__,
|
||||
TYPENAME(T),
|
||||
sizeElements, sizeElements/1024.0/1024.0,
|
||||
hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault,
|
||||
@@ -273,8 +273,8 @@ void memcpytest2_for_type(size_t numElements)
|
||||
{
|
||||
printSep();
|
||||
|
||||
DeviceMemory<T> memD(numElements);
|
||||
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(numElements);
|
||||
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(numElements, 1/*usePinnedHost*/);
|
||||
|
||||
for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) {
|
||||
@@ -307,11 +307,11 @@ void memcpytest2_sizes(size_t maxElem=0)
|
||||
maxElem = free/sizeof(T)/8;
|
||||
}
|
||||
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
|
||||
|
||||
for (size_t elem=1; elem<=maxElem; elem*=2) {
|
||||
@@ -336,11 +336,11 @@ void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets)
|
||||
HIPCHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
|
||||
|
||||
size_t elem = maxElem / 2;
|
||||
@@ -380,16 +380,16 @@ void multiThread_1(bool serialize, bool usePinnedHost)
|
||||
{
|
||||
printSep();
|
||||
printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost);
|
||||
DeviceMemory<T> memD(N);
|
||||
HostMemory<T> mem1(N, usePinnedHost);
|
||||
HostMemory<T> mem2(N, usePinnedHost);
|
||||
DeviceMemory<T> memD(N);
|
||||
HostMemory<T> mem1(N, usePinnedHost);
|
||||
HostMemory<T> mem2(N, usePinnedHost);
|
||||
|
||||
std::thread t1 (memcpytest2<T>, &memD, &mem1, N, 0,0,0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::thread t2 (memcpytest2<T>,&memD, &mem2, N, 0,0,0);
|
||||
if (serialize) {
|
||||
t2.join();
|
||||
@@ -427,21 +427,21 @@ int main(int argc, char *argv[])
|
||||
// Some tests around the 64KB boundary which have historically shown issues:
|
||||
printf ("\n\n=== tests&0x2 (64KB boundary)\n");
|
||||
size_t maxElem = 32*1024*1024;
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
|
||||
// These all pass:
|
||||
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
|
||||
|
||||
// Just over 64MB:
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
|
||||
|
||||
}
|
||||
@@ -464,7 +464,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
// Simplest cases: serialize the threads, and also used pinned memory:
|
||||
// This verifies that the sub-calls to memcpytest2 are correct.
|
||||
multiThread_1<float>(true, true);
|
||||
multiThread_1<float>(true, true);
|
||||
|
||||
// Serialize, but use unpinned memory to stress the unpinned memory xfer path.
|
||||
multiThread_1<float>(true, false);
|
||||
|
||||
@@ -41,8 +41,8 @@ void printSep()
|
||||
// Designed to stress a small number of simple smoke tests
|
||||
|
||||
template<
|
||||
typename T=float,
|
||||
class P=HipTest::Unpinned,
|
||||
typename T=float,
|
||||
class P=HipTest::Unpinned,
|
||||
class C=HipTest::Memcpy
|
||||
>
|
||||
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
|
||||
|
||||
@@ -119,7 +119,7 @@ void Streamer<T>::reset()
|
||||
{
|
||||
HipTest::setDefaultData(_numElements, _A_h, _B_h, _C_h);
|
||||
H2D();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -238,7 +238,7 @@ int main(int argc, char *argv[])
|
||||
nullStreamer->D2H();
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
|
||||
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -263,7 +263,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
|
||||
HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,10 +289,10 @@ int main(int argc, char *argv[])
|
||||
// Copy with stream1, this could go async if the streamSync doesn't synchronize ALL the streams.
|
||||
HIPCHECK(hipMemcpyAsync(streamers[0]->_C_h, streamers[0]->_C_d, streamers[0]->_numElements*sizeof(int), hipMemcpyDeviceToHost, streamers[1]->_stream));
|
||||
|
||||
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements);
|
||||
HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -59,23 +59,23 @@ const char *syncModeString(int syncMode) {
|
||||
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch)
|
||||
{
|
||||
|
||||
// This test sends a long-running kernel to the null stream, then tests to see if the
|
||||
// This test sends a long-running kernel to the null stream, then tests to see if the
|
||||
// specified synchronization technique is effective.
|
||||
//
|
||||
// Some syncMode are not expected to correctly sync (for example "syncNone"). in these
|
||||
// Some syncMode are not expected to correctly sync (for example "syncNone"). in these
|
||||
// cases the test sets expectMismatch and the check logic below will attempt to ensure that
|
||||
// the undesired synchronization did not occur - ie ensure the kernel is still running and did
|
||||
// not yet update the stop event. This can be tricky since if the kernel runs fast enough it
|
||||
// may complete before the check. To prevent this, the addCountReverse has a count parameter
|
||||
// which causes it to loop repeatedly, and the results are checked in reverse order.
|
||||
// may complete before the check. To prevent this, the addCountReverse has a count parameter
|
||||
// which causes it to loop repeatedly, and the results are checked in reverse order.
|
||||
//
|
||||
// Tests with expectMismatch=true should ensure the kernel finishes correctly. This results
|
||||
// are checked and we test to make sure stop event has completed.
|
||||
|
||||
|
||||
if (!(testMask & p_tests)) {
|
||||
return;
|
||||
}
|
||||
printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
|
||||
printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
|
||||
testMask, syncModeString(syncMode), expectMismatch);
|
||||
|
||||
size_t sizeBytes = numElements * sizeof(int);
|
||||
@@ -98,7 +98,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
|
||||
// Launch kernel into null stream, should result in C_h == count.
|
||||
hipLaunchKernelGGL(HipTest::addCountReverse , dim3(blocks), dim3(threadsPerBlock), 0, 0 /*stream*/, C_d, C_h, numElements, count);
|
||||
HIPCHECK(hipEventRecord(stop, 0/*default*/));
|
||||
HIPCHECK(hipEventRecord(stop, 0/*default*/));
|
||||
|
||||
switch (syncMode) {
|
||||
case syncNone:
|
||||
@@ -108,18 +108,18 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s
|
||||
break;
|
||||
case syncOtherStream:
|
||||
// Does this synchronize with the null stream?
|
||||
HIPCHECK(hipStreamSynchronize(otherStream));
|
||||
HIPCHECK(hipStreamSynchronize(otherStream));
|
||||
break;
|
||||
case syncMarkerThenOtherStream:
|
||||
case syncMarkerThenOtherNonBlockingStream:
|
||||
|
||||
// this may wait for NULL stream depending hipStreamNonBlocking flag above
|
||||
HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(otherStream));
|
||||
// this may wait for NULL stream depending hipStreamNonBlocking flag above
|
||||
HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(otherStream));
|
||||
break;
|
||||
case syncDevice:
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@@ -197,7 +197,7 @@ void runTests(int64_t numElements)
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// Can' destroy the default stream:// TODO - move to another test
|
||||
HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle);
|
||||
HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle);
|
||||
|
||||
HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/);
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ private:
|
||||
|
||||
template <typename T>
|
||||
Streamer<T>::Streamer(int deviceId, T * A_d, size_t numElements, int commandType) :
|
||||
_preA_d(NULL),
|
||||
_preA_d(NULL),
|
||||
_A_d(A_d),
|
||||
_deviceId(deviceId),
|
||||
_numElements(numElements),
|
||||
@@ -239,7 +239,7 @@ size_t Streamer<T>::check(int streamerNum, T initValue, T expectedOffset, bool e
|
||||
return _mismatchCount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//---
|
||||
//Parse arguments specific to this test.
|
||||
@@ -300,7 +300,7 @@ void checkAll(int initValue, std::vector<IntStreamer *> &streamers, std::vector<
|
||||
for (int i=0; i<streamers.size(); i++) {
|
||||
|
||||
expected += streamers[i]->expectedAdd();
|
||||
|
||||
|
||||
mismatchCount += streamers[i]->check(i+1, initValue, expected, expectPass);
|
||||
|
||||
}
|
||||
@@ -330,7 +330,7 @@ void checkAll(int initValue, std::vector<IntStreamer *> &streamers, std::vector<
|
||||
|
||||
void sync_none(void) {};
|
||||
|
||||
void sync_allDevices(int numDevices)
|
||||
void sync_allDevices(int numDevices)
|
||||
{
|
||||
for (int d=0; d<numDevices; d++) {
|
||||
HIPCHECK(hipSetDevice(d));
|
||||
@@ -339,7 +339,7 @@ void sync_allDevices(int numDevices)
|
||||
}
|
||||
|
||||
|
||||
void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
|
||||
void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
|
||||
{
|
||||
for (int i=streamers.size()-1; i>=0; i--) {
|
||||
streamers[i]->queryUntilComplete();
|
||||
@@ -347,7 +347,7 @@ void sync_queryAllUntilComplete(std::vector<IntStreamer *> streamers)
|
||||
}
|
||||
|
||||
|
||||
void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere)
|
||||
void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere)
|
||||
{
|
||||
HIPCHECK(hipSetDevice(sideDeviceId));
|
||||
|
||||
@@ -389,7 +389,7 @@ int main(int argc, char *argv[])
|
||||
initArray_h[i] = initValue;
|
||||
}
|
||||
HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
|
||||
int numDevices;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
@@ -414,7 +414,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
|
||||
// A sideband stream channel that is independent from above.
|
||||
// Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is
|
||||
// Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is
|
||||
// asynchronous wrt the other streams.
|
||||
std::vector<hipStream_t> sideStreams;
|
||||
for (int d=0; d<numDevices; d++) {
|
||||
@@ -446,7 +446,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
|
||||
if (p_tests & 0x1000) {
|
||||
printf ("==> Test 0x1000 simple null stream tests\n");
|
||||
printf ("==> Test 0x1000 simple null stream tests\n");
|
||||
|
||||
// try some null stream:
|
||||
hipStreamQuery(0);
|
||||
@@ -463,7 +463,7 @@ int main(int argc, char *argv[])
|
||||
HIPCHECK(hipEventRecord(e1, s1))
|
||||
|
||||
HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/));
|
||||
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s1));
|
||||
HIPCHECK(hipEventDestroy(e1));
|
||||
}
|
||||
@@ -476,11 +476,11 @@ int main(int argc, char *argv[])
|
||||
HIPCHECK(hipEventRecord(e1, hipStream_t(0)))
|
||||
|
||||
HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/));
|
||||
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s1));
|
||||
HIPCHECK(hipEventDestroy(e1));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir