diff --git a/projects/hip-tests/catch/include/hip_test_common.hh b/projects/hip-tests/catch/include/hip_test_common.hh index e97c99bd8f..0fb79a49b3 100644 --- a/projects/hip-tests/catch/include/hip_test_common.hh +++ b/projects/hip-tests/catch/include/hip_test_common.hh @@ -166,7 +166,7 @@ static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlo HIP_CHECK(hipGetDeviceProperties(&props, device)); unsigned blocks = props.multiProcessorCount * blocksPerCU; - if (blocks * threadsPerBlock > N) { + if (blocks * threadsPerBlock < N) { blocks = (N + threadsPerBlock - 1) / threadsPerBlock; } diff --git a/projects/hip-tests/catch/unit/memory/hipMallocManaged.cc b/projects/hip-tests/catch/unit/memory/hipMallocManaged.cc index d40aa924a1..672db36485 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocManaged.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocManaged.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -24,15 +24,14 @@ only on HMM enabled devices */ -#include +#include "hipMallocManagedCommon.hh" #include #include - // Kernel functions -__global__ void KernelMul_MngdMem(int *Hmm, int *Dptr, size_t n) { +__global__ void KernelMul_MngdMem(int* Hmm, int* Dptr, size_t n) { size_t index = blockIdx.x * blockDim.x + threadIdx.x; size_t stride = blockDim.x * gridDim.x; for (size_t i = index; i < n; i += stride) { @@ -40,7 +39,7 @@ __global__ void KernelMul_MngdMem(int *Hmm, int *Dptr, size_t n) { } } -__global__ void KernelMulAdd_MngdMem(int *Hmm, size_t n) { +__global__ void KernelMulAdd_MngdMem(int* Hmm, size_t n) { size_t index = blockIdx.x * blockDim.x + threadIdx.x; size_t stride = blockDim.x * gridDim.x; for (size_t i = index; i < n; i += stride) { @@ -48,130 +47,93 @@ __global__ void KernelMulAdd_MngdMem(int *Hmm, size_t n) { } } -__global__ void KrnlWth2MemTypesC(unsigned char *Hmm, unsigned char *Dptr, - size_t n) { - size_t index = blockIdx.x * blockDim.x + threadIdx.x; - size_t stride = blockDim.x * gridDim.x; - for (size_t i = index; i < n; i += stride) { - Hmm[i] = Dptr[i] + 10; - } -} - -static int HmmAttrPrint() { - int managed = 0; - INFO("The following are the attribute values related to HMM for" - " device 0:\n"); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); - INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeConcurrentManagedAccess, 0)); - INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccess, 0)); - INFO("hipDeviceAttributePageableMemoryAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); - INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" - << managed); - - HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, - 0)); - INFO("hipDeviceAttributeManagedMemory: " << managed); - return managed; -} - - - -static size_t N{4 * 1024 * 1024}; +static size_t numElements{64 * 1024 * 1024}; static unsigned blocksPerCU{6}; static unsigned threadsPerBlock{256}; /* This testcase verifies the hipMallocManaged basic scenario - supported on all devices */ - TEST_CASE("Unit_hipMallocManaged_Basic") { - int numElements = (N < (64 * 1024 * 1024)) ? 64 * 1024 * 1024 : N; - float *A, *B, *C; + auto managed = HmmAttrPrint(); + if (managed != 1) { + WARN( + "GPU doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system " + "memory."); + } - HIP_CHECK(hipMallocManaged(&A, numElements*sizeof(float))); - HIP_CHECK(hipMallocManaged(&B, numElements*sizeof(float))); - HIP_CHECK(hipMallocManaged(&C, numElements*sizeof(float))); + float *A, *B, *C; + + HIP_CHECK(hipMallocManaged(&A, numElements * sizeof(float))); + HIP_CHECK(hipMallocManaged(&B, numElements * sizeof(float))); + HIP_CHECK(hipMallocManaged(&C, numElements * sizeof(float))); } /* - This testcase verifies the hipMallocManaged basic scenario - supported only on HMM enabled devices + This testcase verifies the hipMallocManaged advanced scenario - supported only on HMM enabled + devices */ - TEST_CASE("Unit_hipMallocManaged_Advanced") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int numElements = (N < (64 * 1024 * 1024)) ? 64 * 1024 * 1024 : N; - float *A, *B, *C; - - HIP_CHECK(hipMallocManaged(&A, numElements*sizeof(float))); - HIP_CHECK(hipMallocManaged(&B, numElements*sizeof(float))); - HIP_CHECK(hipMallocManaged(&C, numElements*sizeof(float))); - HipTest::setDefaultData(numElements, A, B, C); - - hipDevice_t device = hipCpuDeviceId; - - HIP_CHECK(hipMemAdvise(A, numElements*sizeof(float), - hipMemAdviseSetReadMostly, device)); - HIP_CHECK(hipMemPrefetchAsync(A, numElements*sizeof(float), 0)); - HIP_CHECK(hipMemPrefetchAsync(B, numElements*sizeof(float), 0)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device), - hipMemRangeAttributeLastPrefetchLocation, - A, numElements*sizeof(float))); - if (device != 0) { - INFO("hipMemRangeGetAttribute error, device = " << device); - } - uint32_t read_only = 0xf; - HIP_CHECK(hipMemRangeGetAttribute(&read_only, sizeof(read_only), - hipMemRangeAttributeReadMostly, - A, numElements*sizeof(float))); - if (read_only != 1) { - SUCCEED("hipMemRangeGetAttribute error, read_only = " << read_only); - } - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, - numElements); - hipEvent_t event0, event1; - HIP_CHECK(hipEventCreate(&event0)); - HIP_CHECK(hipEventCreate(&event1)); - HIP_CHECK(hipEventRecord(event0, 0)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, 0, static_cast(A), - static_cast(B), C, numElements); - HIP_CHECK(hipEventRecord(event1, 0)); - HIP_CHECK(hipDeviceSynchronize()); - float time = 0.0f; - HIP_CHECK(hipEventElapsedTime(&time, event0, event1)); - printf("Time %.3f ms\n", time); - float maxError = 0.0f; - HIP_CHECK(hipMemPrefetchAsync(B, numElements*sizeof(float), - hipCpuDeviceId)); - HIP_CHECK(hipDeviceSynchronize()); - device = 0; - HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device), - hipMemRangeAttributeLastPrefetchLocation, - A, numElements*sizeof(float))); - if (device != hipCpuDeviceId) { - SUCCEED("hipMemRangeGetAttribute error device = " << device); - } - - for (int i = 0; i < numElements; i++) { - maxError = fmax(maxError, fabs(B[i]-3.0f)); - } - HIP_CHECK(hipFree(A)); - HIP_CHECK(hipFree(B)); - REQUIRE(maxError != 0.0f); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } -} + float *A, *B, *C; + + HIP_CHECK(hipMallocManaged(&A, numElements * sizeof(float))); + HIP_CHECK(hipMallocManaged(&B, numElements * sizeof(float))); + HIP_CHECK(hipMallocManaged(&C, numElements * sizeof(float))); + HipTest::setDefaultData(numElements, A, B, C); + + hipDevice_t device = hipCpuDeviceId; + + HIP_CHECK(hipMemAdvise(A, numElements * sizeof(float), hipMemAdviseSetReadMostly, device)); + HIP_CHECK(hipMemPrefetchAsync(A, numElements * sizeof(float), 0)); + HIP_CHECK(hipMemPrefetchAsync(B, numElements * sizeof(float), 0)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device), + hipMemRangeAttributeLastPrefetchLocation, A, + numElements * sizeof(float))); + if (device != 0) { + INFO("hipMemRangeGetAttribute error, device = " << device); + } + uint32_t read_only = 0xf; + HIP_CHECK(hipMemRangeGetAttribute(&read_only, sizeof(read_only), hipMemRangeAttributeReadMostly, + A, numElements * sizeof(float))); + if (read_only != 1) { + SUCCEED("hipMemRangeGetAttribute error, read_only = " << read_only); + } + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + hipEvent_t event0, event1; + HIP_CHECK(hipEventCreate(&event0)); + HIP_CHECK(hipEventCreate(&event1)); + HIP_CHECK(hipEventRecord(event0, 0)); + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A), static_cast(B), C, numElements); + HIP_CHECK(hipEventRecord(event1, 0)); + HIP_CHECK(hipDeviceSynchronize()); + float time = 0.0f; + HIP_CHECK(hipEventElapsedTime(&time, event0, event1)); + printf("Time %.3f ms\n", time); + float maxError = 0.0f; + HIP_CHECK(hipMemPrefetchAsync(B, numElements * sizeof(float), hipCpuDeviceId)); + HIP_CHECK(hipDeviceSynchronize()); + device = 0; + HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device), + hipMemRangeAttributeLastPrefetchLocation, A, + numElements * sizeof(float))); + if (device != hipCpuDeviceId) { + SUCCEED("hipMemRangeGetAttribute error device = " << device); + } + + for (size_t i = 0; i < numElements; i++) { + maxError = fmax(maxError, fabs(B[i] - 3.0f)); + } + HIP_CHECK(hipFree(A)); + HIP_CHECK(hipFree(B)); + REQUIRE(maxError != 0.0f); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMallocManagedCommon.hh b/projects/hip-tests/catch/unit/memory/hipMallocManagedCommon.hh new file mode 100644 index 0000000000..cb6c6c7eee --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMallocManagedCommon.hh @@ -0,0 +1,26 @@ +#include + +static int HmmAttrPrint() { + int managed = 0; + INFO( + "The following are the attribute values related to HMM for" + " device 0:\n"); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); + INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeConcurrentManagedAccess, 0)); + INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccess, 0)); + INFO("hipDeviceAttributePageableMemoryAccess: " << managed); + HIP_CHECK( + hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); + INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed); + + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); + INFO("hipDeviceAttributeManagedMemory: " << managed); + if (managed != 1) { + WARN( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system " + "memory."); + } + return managed; +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/memory/hipMallocManagedFlagsTst.cc b/projects/hip-tests/catch/unit/memory/hipMallocManagedFlagsTst.cc index 8ac1c37853..264320c483 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocManagedFlagsTst.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocManagedFlagsTst.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,245 +20,210 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include "hipMallocManagedCommon.hh" #include // Kernel function -__global__ void MallcMangdFlgTst(int n, float *x, float *y) { +__global__ void MallcMangdFlgTst(int n, float* x, float* y) { int index = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] * x[i]; -} - - -// The following function prints info on attributes related to HMM -static int HmmAttrPrint() { - int managed = 0; - INFO("The following are the attribute values related to HMM for" - " device 0:\n"); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); - INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeConcurrentManagedAccess, 0)); - INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccess, 0)); - INFO("hipDeviceAttributePageableMemoryAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); - INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" - << managed); - - HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, - 0)); - INFO("hipDeviceAttributeManagedMemory: " << managed); - return managed; + for (int i = index; i < n; i += stride) y[i] = x[i] * x[i]; } // The following section tests working of hipMallocManaged with flag parameters TEST_CASE("Unit_hipMallocManaged_FlgParam") { - int managed = HmmAttrPrint(); - if (managed == 1) { - std::atomic DataMismatch{0}; - bool IfTestPassed = true; - float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5; - int NumDevs = 0, NUM_ELMS = 4096; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - float *Ad = NULL, *Ah = NULL; - Ah = new float[NUM_ELMS]; - // Testing hipMemAttachGlobal Flag - HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), - hipMemAttachGlobal)); - - // Initializing HmmAG memory - for (int i = 0; i < NUM_ELMS; i++) { - HmmAG[i] = INIT_VAL; - Ah[i] = 0; - } - - int blockSize = 256; - int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - hipStream_t strm; - for (int i = 0; i < NumDevs; i++) { - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float))); - HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float))); - MallcMangdFlgTst<<>>(NUM_ELMS, HmmAG, Ad); - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); - for (int j = 0; j < NUM_ELMS; ++j) { - if (Ah[j] != (INIT_VAL * INIT_VAL)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("Data Mismatch observed when kernel launched on"); - WARN(" device: " << i); - IfTestPassed = false; - } - DataMismatch = 0; - - HIP_CHECK(hipFree(Ad)); - HIP_CHECK(hipStreamDestroy(strm)); - } - delete[] Ah; - HIP_CHECK(hipFree(HmmAG)); - - DataMismatch = 0; - HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), - hipMemAttachHost)); - HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), - hipMemAttachHost)); - - // Initializing HmmAH memory - for (int i = 0; i < NUM_ELMS; i++) { - HmmAH1[i] = INIT_VAL; - HmmAH2[i] = 0; - } - for (int i = 0; i < NumDevs; i++) { - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float))); - MallcMangdFlgTst<<>>(NUM_ELMS, - HmmAH1, HmmAH2); - HIP_CHECK(hipStreamSynchronize(strm)); - for (int j = 0; j < NUM_ELMS; ++j) { - if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("Data Mismatch observed when kernel launched on"); - WARN(" device: " << i); - IfTestPassed = false; - } - HIP_CHECK(hipStreamDestroy(strm)); - } - HIP_CHECK(hipFree(HmmAH1)); - HIP_CHECK(hipFree(HmmAH2)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("Gpu doesnt support HMM! Hence skipping the test with PASS result"); + + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } + + std::atomic DataMismatch{0}; + bool IfTestPassed = true; + float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5; + int NumDevs = 0, NUM_ELMS = 4096; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + float *Ad = NULL, *Ah = NULL; + Ah = new float[NUM_ELMS]; + // Testing hipMemAttachGlobal Flag + HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal)); + + // Initializing HmmAG memory + for (int i = 0; i < NUM_ELMS; i++) { + HmmAG[i] = INIT_VAL; + Ah[i] = 0; + } + + int blockSize = 256; + int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize; + dim3 dimGrid(numBlocks, 1, 1); + dim3 dimBlock(blockSize, 1, 1); + hipStream_t strm; + for (int i = 0; i < NumDevs; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float))); + HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float))); + MallcMangdFlgTst<<>>(NUM_ELMS, HmmAG, Ad); + HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost)); + for (int j = 0; j < NUM_ELMS; ++j) { + if (Ah[j] != (INIT_VAL * INIT_VAL)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("Data Mismatch observed when kernel launched on"); + WARN(" device: " << i); + IfTestPassed = false; + } + DataMismatch = 0; + + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipStreamDestroy(strm)); + } + delete[] Ah; + HIP_CHECK(hipFree(HmmAG)); + + DataMismatch = 0; + HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost)); + HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost)); + + // Initializing HmmAH memory + for (int i = 0; i < NUM_ELMS; i++) { + HmmAH1[i] = INIT_VAL; + HmmAH2[i] = 0; + } + for (int i = 0; i < NumDevs; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float))); + MallcMangdFlgTst<<>>(NUM_ELMS, HmmAH1, HmmAH2); + HIP_CHECK(hipStreamSynchronize(strm)); + for (int j = 0; j < NUM_ELMS; ++j) { + if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("Data Mismatch observed when kernel launched on"); + WARN(" device: " << i); + IfTestPassed = false; + } + HIP_CHECK(hipStreamDestroy(strm)); + } + HIP_CHECK(hipFree(HmmAH1)); + HIP_CHECK(hipFree(HmmAH2)); + REQUIRE(IfTestPassed); } // The following function tests Memory access allocated using hipMallocManaged // in multiple streams TEST_CASE("Unit_hipMallocManaged_AccessMultiStream") { - int managed = HmmAttrPrint(); - if (managed == 1) { - std::atomic DataMismatch{0}; - bool IfTestPassed = true; - float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5; - int NumStrms = 0, MultiDevice = 0, NUM_ELMS = 4096; - HIP_CHECK(hipGetDeviceCount(&MultiDevice)); - if (MultiDevice >= 2) { - HIP_CHECK(hipGetDeviceCount(&NumStrms)); - } else { - NumStrms = 4; - } - hipStream_t **Stream = new hipStream_t*[NumStrms]; - for (int i = 0; i < NumStrms; ++i) { - Stream[i] = reinterpret_cast(malloc(sizeof(hipStream_t))); - } - float *Ad = NULL, *Ah = NULL; - Ah = new float[NUM_ELMS]; - for (int i = 0; i < NumStrms; ++i) { - if (MultiDevice >= 2) { - HIP_CHECK(hipSetDevice(i)); - } - HIP_CHECK(hipStreamCreate(Stream[i])); - } - HIP_CHECK(hipSetDevice(0)); - // Testing hipMemAttachGlobal Flag - HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), - hipMemAttachGlobal)); - - // Initializing HmmAG memory - for (int i = 0; i < NUM_ELMS; i++) { - HmmAG[i] = INIT_VAL; - Ah[i] = 0; - } - - int blockSize = 256; - int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - for (int i = 0; i < NumStrms; i++) { - if (MultiDevice >= 2) { - HIP_CHECK(hipSetDevice(i)); - } - HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float))); - HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float))); - MallcMangdFlgTst<<>>(NUM_ELMS, - HmmAG, Ad); - HIP_CHECK(hipStreamSynchronize(*(Stream[i]))); - // Validating the results - HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); - for (int j = 0; j < NUM_ELMS; ++j) { - if (Ah[j] != (INIT_VAL * INIT_VAL)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("Data Mismatch observed when kernel launched on"); - WARN(" device: " << i); - IfTestPassed = false; - } - DataMismatch = 0; - - HIP_CHECK(hipFree(Ad)); - } - delete[] Ah; - HIP_CHECK(hipFree(HmmAG)); - - DataMismatch = 0; - HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), - hipMemAttachHost)); - HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), - hipMemAttachHost)); - - // Initializing HmmAH memory - for (int i = 0; i < NUM_ELMS; i++) { - HmmAH1[i] = INIT_VAL; - HmmAH2[i] = 0; - } - for (int i = 0; i < NumStrms; i++) { - if (MultiDevice >= 2) { - HIP_CHECK(hipSetDevice(i)); - } - HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float))); - MallcMangdFlgTst<<>>(NUM_ELMS, - HmmAH1, HmmAH2); - HIP_CHECK(hipStreamSynchronize(*(Stream[i]))); - for (int j = 0; j < NUM_ELMS; ++j) { - if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) { - DataMismatch++; - break; - } - } - if (DataMismatch != 0) { - WARN("Data Mismatch observed when kernel launched on"); - WARN(" device: " << i); - IfTestPassed = false; - } - } - - HIP_CHECK(hipFree(HmmAH1)); - HIP_CHECK(hipFree(HmmAH2)); - for (int i = 0; i < NumStrms; ++i) { - HIP_CHECK(hipStreamDestroy(*(Stream[i]))); - } - REQUIRE(IfTestPassed); - } else { - SUCCEED("Gpu doesnt support HMM! Hence skipping the test with PASS result"); + + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } -} + + std::atomic DataMismatch{0}; + bool IfTestPassed = true; + float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5; + int NumStrms = 0, MultiDevice = 0, NUM_ELMS = 4096; + HIP_CHECK(hipGetDeviceCount(&MultiDevice)); + if (MultiDevice >= 2) { + HIP_CHECK(hipGetDeviceCount(&NumStrms)); + } else { + NumStrms = 4; + } + hipStream_t** Stream = new hipStream_t*[NumStrms]; + for (int i = 0; i < NumStrms; ++i) { + Stream[i] = reinterpret_cast(malloc(sizeof(hipStream_t))); + } + float *Ad = NULL, *Ah = NULL; + Ah = new float[NUM_ELMS]; + for (int i = 0; i < NumStrms; ++i) { + if (MultiDevice >= 2) { + HIP_CHECK(hipSetDevice(i)); + } + HIP_CHECK(hipStreamCreate(Stream[i])); + } + HIP_CHECK(hipSetDevice(0)); + // Testing hipMemAttachGlobal Flag + HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal)); + // Initializing HmmAG memory + for (int i = 0; i < NUM_ELMS; i++) { + HmmAG[i] = INIT_VAL; + Ah[i] = 0; + } + + int blockSize = 256; + int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize; + dim3 dimGrid(numBlocks, 1, 1); + dim3 dimBlock(blockSize, 1, 1); + for (int i = 0; i < NumStrms; i++) { + if (MultiDevice >= 2) { + HIP_CHECK(hipSetDevice(i)); + } + HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float))); + HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float))); + MallcMangdFlgTst<<>>(NUM_ELMS, HmmAG, Ad); + HIP_CHECK(hipStreamSynchronize(*(Stream[i]))); + // Validating the results + HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost)); + for (int j = 0; j < NUM_ELMS; ++j) { + if (Ah[j] != (INIT_VAL * INIT_VAL)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("Data Mismatch observed when kernel launched on"); + WARN(" device: " << i); + IfTestPassed = false; + } + DataMismatch = 0; + + HIP_CHECK(hipFree(Ad)); + } + delete[] Ah; + HIP_CHECK(hipFree(HmmAG)); + + DataMismatch = 0; + HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost)); + HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost)); + + // Initializing HmmAH memory + for (int i = 0; i < NUM_ELMS; i++) { + HmmAH1[i] = INIT_VAL; + HmmAH2[i] = 0; + } + for (int i = 0; i < NumStrms; i++) { + if (MultiDevice >= 2) { + HIP_CHECK(hipSetDevice(i)); + } + HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float))); + MallcMangdFlgTst<<>>(NUM_ELMS, HmmAH1, HmmAH2); + HIP_CHECK(hipStreamSynchronize(*(Stream[i]))); + for (int j = 0; j < NUM_ELMS; ++j) { + if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) { + DataMismatch++; + break; + } + } + if (DataMismatch != 0) { + WARN("Data Mismatch observed when kernel launched on"); + WARN(" device: " << i); + IfTestPassed = false; + } + } + + HIP_CHECK(hipFree(HmmAH1)); + HIP_CHECK(hipFree(HmmAH2)); + for (int i = 0; i < NumStrms; ++i) { + HIP_CHECK(hipStreamDestroy(*(Stream[i]))); + } + REQUIRE(IfTestPassed); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc b/projects/hip-tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc index bf67d70765..976a4397fc 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -27,28 +27,81 @@ 6. Multiple Pointers */ -#include +#include "hipMallocManagedCommon.hh" #include #include #include const size_t MAX_GPU{256}; -static size_t N{4*1024*1024}; +static size_t N{4 * 1024 * 1024}; +static unsigned blocksPerCU{6}; +static unsigned threadsPerBlock{256}; #define INIT_VAL 123 /* * Kernel function to perform addition operation. */ -template -__global__ void -vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; +template __global__ void vector_sum(T* Ad1, T* Ad2, size_t NUM_ELMTS) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; - for (size_t i = offset; i < NUM_ELMTS; i += stride) { - Ad2[i] = Ad1[i] + Ad1[i]; - } + for (size_t i = offset; i < NUM_ELMTS; i += stride) { + Ad2[i] = Ad1[i] + Ad1[i]; + } +} + +/* + * Kernel function to perform multiplication + */ +__global__ void KernelDouble(float* Hmm, float* dPtr, size_t n) { + size_t index = blockIdx.x * blockDim.x + threadIdx.x; + if (index < n) { + dPtr[index] = 2 * Hmm[index]; + } +} + +/* + * Host function to perform multiplication + */ +void HostKernelDouble(float* Hmm, float* hPtr, size_t n) { + for (size_t i = 0; i < n; i++) { + hPtr[i] = 2 * Hmm[i]; + } +} + +/* + This testcase verifies the concurrent access of hipMallocManaged Memory on host and device. + */ +TEST_CASE("Unit_hipMallocManaged_HostDeviceConcurrent") { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + + float *Hmm = nullptr, *hPtr = nullptr, *dPtr = nullptr, *resPtr = nullptr; + + hPtr = reinterpret_cast(malloc(N * sizeof(float))); + resPtr = reinterpret_cast(malloc(N * sizeof(float))); + + HIP_CHECK(hipMalloc(&dPtr, N * sizeof(float))); + HIP_CHECK(hipMallocManaged(&Hmm, N * sizeof(float))); + memset(Hmm, 2.0, N * sizeof(float)); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + std::thread host_thread(HostKernelDouble, Hmm, hPtr, N); + KernelDouble<<>>(Hmm, dPtr, N); + host_thread.join(); + hipMemcpy(resPtr, dPtr, N * sizeof(float), hipMemcpyDeviceToHost); + + for (size_t i = 0; i < N; i++) { + REQUIRE(hPtr[i] == resPtr[i]); + } + + free(hPtr); + HIP_CHECK(hipFree(dPtr)); + HIP_CHECK(hipFree(Hmm)); } // The following Test case tests the following scenario: @@ -57,7 +110,13 @@ vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) { // kernel is launched on acessed chunk of hmm memory // and checks if there are any inconsistencies or access issues TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") { -std::atomic DataMismatch{0}; + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + + std::atomic DataMismatch{0}; constexpr int Chunks = 4; int Counter = 0; int NUM_ELMS = (1024 * 1024); @@ -74,16 +133,14 @@ std::atomic DataMismatch{0}; Hmm[Counter] = (INIT_VAL + i); } } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (NUM_ELMS + 255)/256; + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); for (int k = 0; k < Chunks; ++k) { - vector_sum <<>> - (&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS); + vector_sum + <<>>(&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS); } HIP_CHECK(hipDeviceSynchronize()); for (int m = 0; m < Chunks; ++m) { - HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost)); for (int n = 0; n < NUM_ELMS; ++n) { if (Ah[n] != ((INIT_VAL + m) * 2)) { DataMismatch++; @@ -96,7 +153,7 @@ std::atomic DataMismatch{0}; HIP_CHECK(hipStreamDestroy(stream[i])); } HIP_CHECK(hipFree(Hmm)); - delete [] Ah; + delete[] Ah; } // The following Test case tests the following scenario: @@ -105,10 +162,20 @@ std::atomic DataMismatch{0}; // kernel is launched on acessed chunk of hmm memory // and checks if there are any inconsistencies or access issues TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + std::atomic DataMismatch{0}; int Counter = 0; int NumDevices = 0; HIP_CHECK(hipGetDeviceCount(&NumDevices)); + if (NumDevices < 2) { + HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found."); + return; + } unsigned int NUM_ELMS = (1024 * 1024); float *Ad[MAX_GPU], *Hmm = NULL, *Ah = new float[NUM_ELMS]; hipStream_t stream[MAX_GPU]; @@ -124,17 +191,15 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") { Hmm[Counter] = INIT_VAL + i; } } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (NUM_ELMS + 255)/256; + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) { HIP_CHECK(hipSetDevice(Klaunch)); - vector_sum <<>> - (&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS); + vector_sum<<>>(&Hmm[Klaunch * NUM_ELMS], + Ad[Klaunch], NUM_ELMS); } HIP_CHECK(hipDeviceSynchronize()); for (int m = 0; m < NumDevices; ++m) { - HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost)); for (size_t n = 0; n < NUM_ELMS; ++n) { if (Ah[n] != ((INIT_VAL + m) * 2)) { DataMismatch++; @@ -148,32 +213,38 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") { HIP_CHECK(hipStreamDestroy(stream[i])); } HIP_CHECK(hipFree(Hmm)); - delete [] Ah; + delete[] Ah; } // The following tests oversubscription hipMallocManaged() api // Currently disabled. TEST_CASE("Unit_hipMallocManaged_OverSubscription") { - void *A = nullptr; + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + + void* A = nullptr; size_t total = 0, free = 0; HIP_CHECK(hipMemGetInfo(&free, &total)); // ToDo: In case of HMM, memory over-subscription is allowed. Hence, relook // into how out of memory can be tested. // Demanding more mem size than available #if HT_AMD - REQUIRE(hipMallocManaged(&A, (free +1), hipMemAttachGlobal) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(&A, (free + 1), hipMemAttachGlobal), hipErrorOutOfMemory); #endif } // The following test does negative testing of hipMallocManaged() api // by passing invalid values and check if the behavior is as expected TEST_CASE("Unit_hipMallocManaged_Negative") { - void *A; + void* A; size_t total = 0, free = 0; HIP_CHECK(hipMemGetInfo(&free, &total)); SECTION("Nullptr to devPtr") { - REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachGlobal) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(NULL, 1024, hipMemAttachGlobal), hipErrorInvalidValue); } // cuda api doc says : If size is 0, cudaMallocManaged returns @@ -184,14 +255,14 @@ TEST_CASE("Unit_hipMallocManaged_Negative") { // reset ptr while returning success (to accommodate cuda 11.2 api behavior). SECTION("size 0 with flag hipMemAttachGlobal") { #if HT_AMD - REQUIRE(hipMallocManaged(&A, 0, hipMemAttachGlobal) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(&A, 0, hipMemAttachGlobal), hipErrorInvalidValue); #else - REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess); + HIP_CHECK(hipMallocManaged(&A, 0, hipMemAttachGlobal)); #endif } SECTION("devptr is nullptr with flag hipMemAttachHost") { - REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachHost) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(NULL, 1024, hipMemAttachHost), hipErrorInvalidValue); } // cuda api doc says : If size is 0, cudaMallocManaged returns @@ -202,32 +273,47 @@ TEST_CASE("Unit_hipMallocManaged_Negative") { // reset ptr while returning success (to accommodate cuda 11.2 api behavior). SECTION("size 0 with flag hipMemAttachHost") { #if HT_AMD - REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(&A, 0, hipMemAttachHost), hipErrorInvalidValue); #else - REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess); + HIP_CHECK(hipMallocManaged(&A, 0, hipMemAttachHost)); #endif } + SECTION("nullptr to devptr, size 0 and flag 0") { - REQUIRE(hipMallocManaged(NULL, 0, 0) != hipSuccess); + HIP_CHECK_ERROR(hipMallocManaged(NULL, 0, 0), hipErrorInvalidValue); } - SECTION("Numeric value to flag parameter") { - REQUIRE(hipMallocManaged(&A, 1024, 145) != hipSuccess); + SECTION("Invalid flag parameter") { + HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, 145), hipErrorInvalidValue); + } + SECTION("Invalid flag parameter- flag set to 0") { + HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, 0), hipErrorInvalidValue); + } + SECTION("Invalid flag parameter- Both flags set") { + HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, hipMemAttachGlobal | hipMemAttachHost), + hipErrorInvalidValue); } - SECTION("Negative value to size") { - REQUIRE(hipMallocManaged(&A, -10, hipMemAttachGlobal)); + SECTION("Max value to size") { + HIP_CHECK_ERROR(hipMallocManaged(&A, std::numeric_limits::max(), hipMemAttachGlobal), + hipErrorOutOfMemory); } } // Allocate two pointers using hipMallocManaged(), initialize, // then launch kernel using these pointers directly and // later validate the content without using any Memcpy. -TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", - int, float, double) { +TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double) { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + int NumDevices = 0; HIP_CHECK(hipGetDeviceCount(&NumDevices)); TestType *Hmm1 = nullptr, *Hmm2 = nullptr; + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); for (int i = 0; i < NumDevices; ++i) { HIP_CHECK(hipSetDevice(i)); @@ -238,10 +324,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", Hmm1[m] = m; Hmm2[m] = 0; } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N + 255)/256; // Kernel launch - vector_sum <<>> (Hmm1, Hmm2, N); + vector_sum<<>>(Hmm1, Hmm2, N); HIP_CHECK(hipDeviceSynchronize()); for (size_t v = 0; v < N; ++v) { if (Hmm2[v] != static_cast(v + v)) { @@ -259,18 +343,28 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", // to all other devices. This include verification and Device two Device // transfers and kernel launch o discover if there any access issues. -TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", - unsigned char, int, float, double) { +TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", unsigned char, int, float, + double) { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + std::atomic DataMismatch; - TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr, - *Hmm = nullptr; + TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr, *Hmm = nullptr; int NumDevices = 0; HIP_CHECK(hipGetDeviceCount(&NumDevices)); + if (NumDevices < 2) { + HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found."); + return; + } - for (size_t i =0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { Ah1[i] = INIT_VAL; Ah2[i] = 0; } + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); for (int Oloop = 0; Oloop < NumDevices; ++Oloop) { DataMismatch = 0; HIP_CHECK(hipSetDevice(Oloop)); @@ -279,8 +373,7 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", HIP_CHECK(hipSetDevice(Iloop)); HIP_CHECK(hipMalloc(&Ad, N * sizeof(TestType))); // Copy data from host to hipMallocMananged memory and verify - HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType), - hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType), hipMemcpyHostToDevice)); for (size_t v = 0; v < N; ++v) { if (Hmm[v] != INIT_VAL) { DataMismatch++; @@ -289,10 +382,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", REQUIRE(DataMismatch.load() == 0); // Executing D2D transfer with hipMallocManaged memory and verify - HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType), - hipMemcpyDeviceToDevice)); - HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType), hipMemcpyDeviceToDevice)); + HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), hipMemcpyDeviceToHost)); for (size_t k = 0; k < N; ++k) { if (Ah2[k] != INIT_VAL) { DataMismatch++; @@ -300,14 +391,11 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", } REQUIRE(DataMismatch.load() == 0); HIP_CHECK(hipMemset(Ad, 0, N * sizeof(TestType))); - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N + 255)/256; // Launching the kernel to check if there is any access issue with // hipMallocManaged memory and local device's memory - vector_sum <<>> (Hmm, Ad, N); - hipDeviceSynchronize(); - HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), - hipMemcpyDeviceToHost)); + vector_sum<<>>(Hmm, Ad, N); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), hipMemcpyDeviceToHost)); for (size_t m = 0; m < N; ++m) { if (Ah2[m] != 246) { DataMismatch++; diff --git a/projects/hip-tests/catch/unit/memory/hipMallocMngdMultiThread.cc b/projects/hip-tests/catch/unit/memory/hipMallocMngdMultiThread.cc index a2f497058c..a163e9a0ab 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocMngdMultiThread.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocMngdMultiThread.cc @@ -20,25 +20,25 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include +#include "hipMallocManagedCommon.hh" #include // Kernel functions -__global__ void HmmMultiThread(int n, float *x, float *y) { +__global__ void HmmMultiThread(int n, float* x, float* y) { int index = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] * x[i]; + for (int i = index; i < n; i += stride) y[i] = x[i] * x[i]; } -__global__ void KrnlWth2MemTypes(int *Hmm, int *Dptr, size_t n) { +__global__ void KrnlWth2MemTypes(int* Hmm, int* Dptr, size_t n) { size_t index = blockIdx.x * blockDim.x + threadIdx.x; for (size_t i = index; i < n; i++) { Hmm[i] = Dptr[i] + 10; } } -__global__ void KernelMul_MngdMem123(int *Hmm, int *Dptr, size_t n) { +__global__ void KernelMul_MngdMem123(int* Hmm, int* Dptr, size_t n) { size_t index = blockIdx.x * blockDim.x + threadIdx.x; size_t stride = blockDim.x * gridDim.x; for (size_t i = index; i < n; i += stride) { @@ -47,32 +47,27 @@ __global__ void KernelMul_MngdMem123(int *Hmm, int *Dptr, size_t n) { } - // The following variable is used to determine the failure of test case -static bool IfTestPassed = true; +static bool IfTestPassed = true; -static void LaunchKrnl(int *Hmm1, size_t NumElms, int InitVal, int GpuOrdnl, - int AdviseFlg) { - int *Hmm2 = NULL; +static void LaunchKrnl(int* Hmm1, size_t NumElms, int InitVal, int GpuOrdnl, int AdviseFlg) { + int* Hmm2 = NULL; hipStream_t strm; HIPCHECK(hipSetDevice(GpuOrdnl)); HIPCHECK(hipStreamCreate(&strm)); if (AdviseFlg == 0) { - HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int), - hipMemAdviseSetReadMostly, GpuOrdnl)); + HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetReadMostly, GpuOrdnl)); } else if (AdviseFlg == 1) { - HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int), - hipMemAdviseSetPreferredLocation, GpuOrdnl)); + HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetPreferredLocation, GpuOrdnl)); } else if (AdviseFlg == 2) { - HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int), - hipMemAdviseSetAccessedBy, GpuOrdnl)); + HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetAccessedBy, GpuOrdnl)); } else if (AdviseFlg == 3) { HIPCHECK(hipMemPrefetchAsync(Hmm1, NumElms * sizeof(int), GpuOrdnl, strm)); HIPCHECK(hipStreamSynchronize(strm)); } HIPCHECK(hipMallocManaged(&Hmm2, (sizeof(int) * NumElms))); for (int i = 0; i < 2; ++i) { - KrnlWth2MemTypes<<<((NumElms + 63)/64), 64, 0, strm>>>(Hmm2, Hmm1, NumElms); + KrnlWth2MemTypes<<<((NumElms + 63) / 64), 64, 0, strm>>>(Hmm2, Hmm1, NumElms); HIPCHECK(hipStreamSynchronize(strm)); } // Verifying the result @@ -88,7 +83,7 @@ static void LaunchKrnl(int *Hmm1, size_t NumElms, int InitVal, int GpuOrdnl, } } -static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) { +static void LaunchKrnl2(int* Hmm, size_t NumElms, int InitVal, int HmmMem) { int *ptr = nullptr, blockSize = 64, *HstPtr = nullptr; hipStream_t strm; HIPCHECK(hipStreamCreate(&strm)); @@ -99,7 +94,7 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) { HIPCHECK(hipMallocManaged(&ptr, (sizeof(int) * NumElms))); } dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1); for (int i = 0; i < 2; ++i) { KrnlWth2MemTypes<<>>(ptr, Hmm, NumElms); } @@ -107,8 +102,7 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) { // Verifying the result int DataMismatch = 0; if (HmmMem == 0) { - HIPCHECK(hipMemcpy(HstPtr, ptr, (sizeof(int) * NumElms), - hipMemcpyDeviceToHost)); + HIPCHECK(hipMemcpy(HstPtr, ptr, (sizeof(int) * NumElms), hipMemcpyDeviceToHost)); for (size_t i = 0; i < NumElms; ++i) { if (HstPtr[i] != (InitVal + 10)) { DataMismatch++; @@ -127,13 +121,13 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) { } } -static void LaunchKrnl3(int *Dptr, size_t NumElms, int InitVal) { +static void LaunchKrnl3(int* Dptr, size_t NumElms, int InitVal) { int *Hmm = NULL, blockSize = 64; hipStream_t strm; HIPCHECK(hipStreamCreate(&strm)); HIPCHECK(hipMallocManaged(&Hmm, (sizeof(int) * NumElms))); dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1); for (int i = 0; i < 2; ++i) { KrnlWth2MemTypes<<>>(Hmm, Dptr, NumElms); } @@ -152,14 +146,13 @@ static void LaunchKrnl3(int *Dptr, size_t NumElms, int InitVal) { } -static void LaunchKrnl5(int *Hmm1, size_t NumElms, int InitVal, - int KerneltoLaunch) { +static void LaunchKrnl5(int* Hmm1, size_t NumElms, int InitVal, int KerneltoLaunch) { int *Hmm2 = NULL, blockSize = 64; hipStream_t strm; HIPCHECK(hipStreamCreate(&strm)); HIPCHECK(hipMallocManaged(&Hmm2, (sizeof(int) * NumElms))); dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1); for (int i = 0; i < 2; ++i) { if (KerneltoLaunch == 0) { KrnlWth2MemTypes<<>>(Hmm2, Hmm1, NumElms); @@ -200,8 +193,7 @@ static void TestFlagParamGlobal(int dev) { HIPCHECK(hipSetDevice(dev)); HIPCHECK(hipStreamCreate(&strm)); // Testing hipMemAttachGlobal Flag - HIPCHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), - hipMemAttachGlobal)); + HIPCHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal)); // Initializing HmmAG memory for (int i = 0; i < NUM_ELMS; i++) { @@ -246,10 +238,8 @@ static void TestFlagParamHost(int dev) { hipStream_t strm; HIPCHECK(hipSetDevice(dev)); HIPCHECK(hipStreamCreate(&strm)); - HIPCHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), - hipMemAttachHost)); - HIPCHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), - hipMemAttachHost)); + HIPCHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost)); + HIPCHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost)); // Initializing HmmAH memory for (int i = 0; i < NUM_ELMS; i++) { HmmAH1[i] = INIT_VAL; @@ -294,77 +284,54 @@ static void AllocateHmmMemory(int flag, int device) { } } - -static int HmmAttrPrint() { - int managed = 0; - INFO("The following are the attribute values related to HMM for" - " device 0:\n"); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); - INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeConcurrentManagedAccess, 0)); - INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccess, 0)); - INFO("hipDeviceAttributePageableMemoryAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); - INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" - << managed); - - HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, - 0)); - INFO("hipDeviceAttributeManagedMemory: " << managed); - return managed; -} - TEST_CASE("Unit_hipMallocManaged_MultiThread") { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + IfTestPassed = true; - int NumDevs = 0, managed = 0, ATTACH_GLOBAL = 0, ATTACH_HOST = 1; + int NumDevs = 0, ATTACH_GLOBAL = 0, ATTACH_HOST = 1; int ITERATIONS = 10; - managed = HmmAttrPrint(); - if (managed) { - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - std::vector T1; - std::vector T2; - for (int i = 0; i < NumDevs; ++i) { - for (int j = 0; j < ITERATIONS; ++j) { - T1.push_back(std::thread(TestFlagParamGlobal, i)); - T2.push_back(std::thread(AllocateHmmMemory, ATTACH_GLOBAL, i)); - } - for (auto &t1 : T1) { - if (t1.joinable()) { - t1.join(); - } - } - for (auto &t2 : T2) { - if (t2.joinable()) { - t2.join(); - } + + + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + std::vector T1; + std::vector T2; + for (int i = 0; i < NumDevs; ++i) { + for (int j = 0; j < ITERATIONS; ++j) { + T1.push_back(std::thread(TestFlagParamGlobal, i)); + T2.push_back(std::thread(AllocateHmmMemory, ATTACH_GLOBAL, i)); + } + for (auto& t1 : T1) { + if (t1.joinable()) { + t1.join(); } } - T1.clear(); - T2.clear(); - for (int i = 0; i < NumDevs; ++i) { - for (int j = 0; j < ITERATIONS; ++j) { - T1.push_back(std::thread(TestFlagParamHost, i)); - T2.push_back(std::thread(AllocateHmmMemory, ATTACH_HOST, i)); - } - for (auto &t1 : T1) { - if (t1.joinable()) { - t1.join(); - } - } - for (auto &t2 : T2) { - if (t2.joinable()) { - t2.join(); - } + for (auto& t2 : T2) { + if (t2.joinable()) { + t2.join(); + } + } + } + T1.clear(); + T2.clear(); + for (int i = 0; i < NumDevs; ++i) { + for (int j = 0; j < ITERATIONS; ++j) { + T1.push_back(std::thread(TestFlagParamHost, i)); + T2.push_back(std::thread(AllocateHmmMemory, ATTACH_HOST, i)); + } + for (auto& t1 : T1) { + if (t1.joinable()) { + t1.join(); + } + } + for (auto& t2 : T2) { + if (t2.joinable()) { + t2.join(); } } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory" - "attribute. Hence skipping the testing with Pass result.\n"); } REQUIRE(IfTestPassed); } @@ -372,175 +339,169 @@ TEST_CASE("Unit_hipMallocManaged_MultiThread") { // The following test checks what happens when same Hmm memory is used to // launch multiple threads over multiple gpus TEST_CASE("Unit_hipMallocManaged_MGpuMThread") { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } + IfTestPassed = true; int Ngpus = 0; HIP_CHECK(hipGetDeviceCount(&Ngpus)); if (Ngpus < 2) { - WARN("This test needs atleast 2 or more gpus, but the system"); - WARN(" has only " << Ngpus); - WARN(" gpus. Hence skipping the test."); - SUCCEED("\n"); + HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found."); + return; } - int managed = HmmAttrPrint(); - if (managed == 1) { - int InitVal = 123, *Hmm1 = NULL, NumElms = 4096*4; - HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); - for (int i = 0; i < NumElms; ++i) { - Hmm1[i] = InitVal; - } - std::vector Thrds; - // AdviseFlg=0 for ReadMostly to be applied - // AdviseFlg=1 for PreferredLocation to be applied - // AdviseFlg=2 for AccessedBy to be applied - // AdviseFlg=3 to prefetch the memory to particular gpu - for (int AdviseFlg = 0; AdviseFlg < 4; ++AdviseFlg) { - for (int i = 0; i < Ngpus; ++i) { - Thrds.push_back(std::thread(LaunchKrnl, Hmm1, NumElms, InitVal, i, - AdviseFlg)); - } - for (auto &thr : Thrds) { - if (thr.joinable()) { - thr.join(); - } + int InitVal = 123, *Hmm1 = NULL, NumElms = 4096 * 4; + HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); + for (int i = 0; i < NumElms; ++i) { + Hmm1[i] = InitVal; + } + + std::vector Thrds; + // AdviseFlg=0 for ReadMostly to be applied + // AdviseFlg=1 for PreferredLocation to be applied + // AdviseFlg=2 for AccessedBy to be applied + // AdviseFlg=3 to prefetch the memory to particular gpu + for (int AdviseFlg = 0; AdviseFlg < 4; ++AdviseFlg) { + for (int i = 0; i < Ngpus; ++i) { + Thrds.push_back(std::thread(LaunchKrnl, Hmm1, NumElms, InitVal, i, AdviseFlg)); + } + for (auto& thr : Thrds) { + if (thr.joinable()) { + thr.join(); } } - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } + REQUIRE(IfTestPassed); } // The following test checks what happens when multiple kernels are launched // with same Hmm memory TEST_CASE("Unit_hipMallocManaged_MultiKrnlComnHmm") { - IfTestPassed = true; - int managed = HmmAttrPrint(); - if (managed == 1) { - int InitVal = 123, *Hmm = NULL, NumElms = 1024*4, TotThrds = 2; - int HmmMem2 = 0, *HstPtr = nullptr; // to indicate the thread that - // hipMalloc() memory has to be used - HstPtr = reinterpret_cast(new int[NumElms]); - HIP_CHECK(hipMalloc(&Hmm, (NumElms * sizeof(int)))); - for (int i = 0; i < NumElms; ++i) { - HstPtr[i] = InitVal; - } - HIP_CHECK(hipMemcpy(Hmm, HstPtr, (NumElms * sizeof(int)), - hipMemcpyHostToDevice)); - std::vector Thrds; - for (int i = 0; i < TotThrds; ++i) { - Thrds.push_back(std::thread(LaunchKrnl2, Hmm, NumElms, InitVal, HmmMem2)); - } - - for (auto &thr : Thrds) { - if (thr.joinable()) { - thr.join(); - } - } - delete[] HstPtr; - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } + + IfTestPassed = true; + + int InitVal = 123, *Hmm = NULL, NumElms = 1024 * 4, TotThrds = 2; + int HmmMem2 = 0, *HstPtr = nullptr; // to indicate the thread that + // hipMalloc() memory has to be used + HstPtr = reinterpret_cast(new int[NumElms]); + HIP_CHECK(hipMalloc(&Hmm, (NumElms * sizeof(int)))); + for (int i = 0; i < NumElms; ++i) { + HstPtr[i] = InitVal; + } + HIP_CHECK(hipMemcpy(Hmm, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice)); + std::vector Thrds; + for (int i = 0; i < TotThrds; ++i) { + Thrds.push_back(std::thread(LaunchKrnl2, Hmm, NumElms, InitVal, HmmMem2)); + } + + for (auto& thr : Thrds) { + if (thr.joinable()) { + thr.join(); + } + } + delete[] HstPtr; } // The following test checks what happens when multiple kernels are launched // with same hipMalloc() memory TEST_CASE("Unit_hipMallocManaged_MultiKrnlComnMalloc") { - IfTestPassed = true; - int managed = HmmAttrPrint(); - if (managed) { - int InitVal = 123, *Dptr = NULL, NumElms = 4096*8, TotThrds = 2; - int *HstPtr = reinterpret_cast(new int[NumElms]); - HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int)))); - for (int i = 0; i < NumElms; ++i) { - HstPtr[i] = InitVal; - } - HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), - hipMemcpyHostToDevice)); - std::vector Thrds; - for (int i = 0; i < TotThrds; ++i) { - Thrds.push_back(std::thread(LaunchKrnl3, Dptr, NumElms, InitVal)); - } - - for (auto &thr : Thrds) { - if (thr.joinable()) { - thr.join(); - } - } - delete[] HstPtr; - HIP_CHECK(hipFree(Dptr)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } + + IfTestPassed = true; + int InitVal = 123, *Dptr = NULL, NumElms = 4096 * 8, TotThrds = 2; + int* HstPtr = reinterpret_cast(new int[NumElms]); + HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int)))); + for (int i = 0; i < NumElms; ++i) { + HstPtr[i] = InitVal; + } + HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice)); + std::vector Thrds; + for (int i = 0; i < TotThrds; ++i) { + Thrds.push_back(std::thread(LaunchKrnl3, Dptr, NumElms, InitVal)); + } + + for (auto& thr : Thrds) { + if (thr.joinable()) { + thr.join(); + } + } + delete[] HstPtr; + HIP_CHECK(hipFree(Dptr)); } // The following section tests the scenario wherein multiple threads use their // own stream to launch kernel on common Hmm memory TEST_CASE("Unit_hipMallocManaged_MultiThrdMultiStrm") { - IfTestPassed = true; - int managed = HmmAttrPrint(); - if (managed == 1) { - int NumElms = 4096*4; - int *Hmm1 = NULL, TotlThrds = 4, InitVal = 123; - int HmmMem = 1; // to indicate the thread that Hmm memory need to be - // used inside it - HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); - for (int i = 0; i < NumElms; ++i) { - Hmm1[i] = InitVal; - } - std::vector Thrds; - for (int i = 0; i < TotlThrds; ++i) { - Thrds.push_back(std::thread(LaunchKrnl2, Hmm1, NumElms, InitVal, HmmMem)); - } + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; + } - for (auto &thr : Thrds) { - if (thr.joinable()) { - thr.join(); - } + IfTestPassed = true; + + int NumElms = 4096 * 4; + int *Hmm1 = NULL, TotlThrds = 4, InitVal = 123; + int HmmMem = 1; // to indicate the thread that Hmm memory need to be + // used inside it + HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); + for (int i = 0; i < NumElms; ++i) { + Hmm1[i] = InitVal; + } + std::vector Thrds; + for (int i = 0; i < TotlThrds; ++i) { + Thrds.push_back(std::thread(LaunchKrnl2, Hmm1, NumElms, InitVal, HmmMem)); + } + + for (auto& thr : Thrds) { + if (thr.joinable()) { + thr.join(); } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } } - // The following section tests the scenario wherein two threads each use // different kernel but common HMM memory TEST_CASE("Unit_hipMallocManaged_TwoKrnlsComnHmmMem") { - IfTestPassed = true; - int managed = HmmAttrPrint(); - if (managed == 1) { - int InitVal = 123, *Dptr = NULL, NumElms = 4096*4, TotThrds = 2; - int *HstPtr = reinterpret_cast(new int[NumElms]); - HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int)))); - for (int i = 0; i < NumElms; ++i) { - HstPtr[i] = InitVal; - } - HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), - hipMemcpyHostToDevice)); - std::vector Thrds; - for (int i = 0; i < TotThrds; ++i) { - Thrds.push_back(std::thread(LaunchKrnl5, Dptr, NumElms, InitVal, i)); - } - - for (auto &thr : Thrds) { - if (thr.joinable()) { - thr.join(); - } - } - delete[] HstPtr; - HIP_CHECK(hipFree(Dptr)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test."); + return; } + + IfTestPassed = true; + int InitVal = 123, *Dptr = NULL, NumElms = 4096 * 4, TotThrds = 2; + int* HstPtr = reinterpret_cast(new int[NumElms]); + HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int)))); + for (int i = 0; i < NumElms; ++i) { + HstPtr[i] = InitVal; + } + HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice)); + std::vector Thrds; + for (int i = 0; i < TotThrds; ++i) { + Thrds.push_back(std::thread(LaunchKrnl5, Dptr, NumElms, InitVal, i)); + } + + for (auto& thr : Thrds) { + if (thr.joinable()) { + thr.join(); + } + } + delete[] HstPtr; + HIP_CHECK(hipFree(Dptr)); } - -