// Test under-development. Calls async mem-copy API, experiment with functionality. #include "hip_runtime.h" #include "test_common.h" unsigned p_streams = 2; void simpleNegTest() { printf ("testing: %s\n",__func__); hipError_t e; float *A_malloc, *A_pinned, *A_d; size_t Nbytes = N*sizeof(float); A_malloc = (float*)malloc(Nbytes); HIPCHECK(hipMallocHost(&A_pinned, Nbytes)); HIPCHECK(hipMalloc(&A_d, Nbytes)); // Can't use default with async copy e = hipMemcpyAsync(A_pinned, A_d, Nbytes, hipMemcpyDefault, NULL); HIPASSERT (e == hipSuccess); // Not sure what happens here, the memory must be pinned. e = hipMemcpyAsync(A_malloc, A_d, Nbytes, hipMemcpyHostToDevice, NULL); printf (" async memcpy of A_malloc to A_d. Result=%d\n", e); //HIPASSERT (e==hipErrorInvalidValue); } class Pinned; class Unpinned; template struct HostTraits; template<> struct HostTraits { static const char *Name() { return "Pinned"; } ; static void *Alloc(size_t sizeBytes) { void *p; HIPCHECK(hipMallocHost(&p, sizeBytes)); return p; }; }; template __global__ void addK (hipLaunchParm lp, T *A, T K, size_t numElements) { size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); size_t stride = hipBlockDim_x * hipGridDim_x ; for (size_t i=offset; i void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide) { HIPASSERT(numElements % numInflight == 0); // Must be evenly divisible. size_t Nbytes = numElements*sizeof(T); size_t eachCopyElements = numElements / numInflight; size_t eachCopyBytes = eachCopyElements * sizeof(T); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); printf ("-----------------------------------------------------------------------------------------------\n"); printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n", __func__, HostTraits::Name(), Nbytes, (double)(Nbytes)/1024.0/1024.0, numPongs, numInflight, eachCopyElements, eachCopyBytes); T *A_h; T *A_d; A_h = (T*)(HostTraits::Alloc(Nbytes)); HIPCHECK(hipMalloc(&A_d, Nbytes)); // Initialize the host array: const T initValue = 13; const T deviceConst = 2; const T hostConst = 10000; for (size_t i=0; i, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d, 2, numElements); for (int i=0; i (i); } //stream=0; // fixme TODO for (int i=0; i= argc || !HipTest::parseUInt(argv[i], &p_streams)) { failed("Bad streams argument"); } } else { failed("Bad argument '%s'", arg); } }; }; int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); parseMyArguments(argc, argv); printf ("info: set device to %d\n", p_gpuDevice); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x01) { simpleNegTest(); } if (p_tests & 0x02) { hipStream_t stream; HIPCHECK (hipStreamCreate(&stream)); test_manyInflightCopies(stream, 1024, 16, true); test_manyInflightCopies(stream, 1024, 4, true); // verify we re-use the same entries instead of growing pool. test_manyInflightCopies(stream, 1024*8, 64, false); HIPCHECK(hipStreamDestroy(stream)); } if (p_tests & 0x04) { test_chunkedAsyncExample(p_streams, true, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, false, true); // Some async test_chunkedAsyncExample(p_streams, false, false, false); // All async } if (p_tests & 0x08) { hipStream_t stream; HIPCHECK (hipStreamCreate(&stream)); // test_pingpong(stream, 1024*1024*32, 1, 1, false); // test_pingpong(stream, 1024*1024*32, 1, 10, false); HIPCHECK(hipStreamDestroy(stream)); } passed(); }