Extending hipMallocManaged tests (#2670)

* Extending hipMallocManaged tests * Fixed compilation error * Added tests skips for hipMallocManaged tests on devices that don't support managed memory * Removed unused stream [ROCm/hip-tests commit: 4b07ea6125]
2022-07-29 04:05:27 +01:00
@@ -166,7 +166,7 @@ static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlo
  HIP_CHECK(hipGetDeviceProperties(&props, device));

  unsigned blocks = props.multiProcessorCount * blocksPerCU;
-  if (blocks * threadsPerBlock > N) {
+  if (blocks * threadsPerBlock < N) {
    blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
  }

@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
   Permission is hereby granted, free of charge, to any person obtaining a copy
   of this software and associated documentation files (the "Software"), to deal
   in the Software without restriction, including without limitation the rights
@@ -24,15 +24,14 @@
       only on HMM enabled devices
 */

-#include <hip_test_common.hh>
+#include "hipMallocManagedCommon.hh"
 #include <hip_test_kernels.hh>
 #include <hip_test_checkers.hh>


-
 // Kernel functions

-__global__ void KernelMul_MngdMem(int *Hmm, int *Dptr, size_t n) {
+__global__ void KernelMul_MngdMem(int* Hmm, int* Dptr, size_t n) {
  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
  size_t stride = blockDim.x * gridDim.x;
  for (size_t i = index; i < n; i += stride) {
@@ -40,7 +39,7 @@ __global__ void KernelMul_MngdMem(int *Hmm, int *Dptr, size_t n) {
  }
 }

-__global__ void KernelMulAdd_MngdMem(int *Hmm, size_t n) {
+__global__ void KernelMulAdd_MngdMem(int* Hmm, size_t n) {
  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
  size_t stride = blockDim.x * gridDim.x;
  for (size_t i = index; i < n; i += stride) {
@@ -48,130 +47,93 @@ __global__ void KernelMulAdd_MngdMem(int *Hmm, size_t n) {
  }
 }

-__global__ void KrnlWth2MemTypesC(unsigned char *Hmm, unsigned char *Dptr,
-                                  size_t n) {
-  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
-  size_t stride = blockDim.x * gridDim.x;
-  for (size_t i = index; i < n; i += stride) {
-    Hmm[i] = Dptr[i] + 10;
-  }
-}

-
-static int HmmAttrPrint() {
-  int managed = 0;
-  INFO("The following are the attribute values related to HMM for"
-         " device 0:\n");
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
-  INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributeConcurrentManagedAccess, 0));
-  INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributePageableMemoryAccess, 0));
-  INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
-  INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:"
-         << managed);
-
-  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
-                                  0));
-  INFO("hipDeviceAttributeManagedMemory: " << managed);
-  return managed;
-}
-
-
-
-static size_t N{4 * 1024 * 1024};
+static size_t numElements{64 * 1024 * 1024};
 static unsigned blocksPerCU{6};
 static unsigned threadsPerBlock{256};

 /*
   This testcase verifies the hipMallocManaged basic scenario - supported on all devices
 */
-
 TEST_CASE("Unit_hipMallocManaged_Basic") {
-    int numElements = (N < (64 * 1024 * 1024)) ? 64 * 1024 * 1024 : N;
-    float *A, *B, *C;
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    WARN(
+        "GPU doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system "
+        "memory.");
+  }

-    HIP_CHECK(hipMallocManaged(&A, numElements*sizeof(float)));
-    HIP_CHECK(hipMallocManaged(&B, numElements*sizeof(float)));
-    HIP_CHECK(hipMallocManaged(&C, numElements*sizeof(float)));
+  float *A, *B, *C;
+
+  HIP_CHECK(hipMallocManaged(&A, numElements * sizeof(float)));
+  HIP_CHECK(hipMallocManaged(&B, numElements * sizeof(float)));
+  HIP_CHECK(hipMallocManaged(&C, numElements * sizeof(float)));
 }

 /*
-   This testcase verifies the hipMallocManaged basic scenario - supported only on HMM enabled devices
+   This testcase verifies the hipMallocManaged advanced scenario - supported only on HMM enabled
+   devices
 */
-
 TEST_CASE("Unit_hipMallocManaged_Advanced") {
-  int managed =  HmmAttrPrint();
-  if (managed == 1) {
-    int numElements = (N < (64 * 1024 * 1024)) ? 64 * 1024 * 1024 : N;
-    float *A, *B, *C;
-
-    HIP_CHECK(hipMallocManaged(&A, numElements*sizeof(float)));
-    HIP_CHECK(hipMallocManaged(&B, numElements*sizeof(float)));
-    HIP_CHECK(hipMallocManaged(&C, numElements*sizeof(float)));
-    HipTest::setDefaultData(numElements, A, B, C);
-
-    hipDevice_t device = hipCpuDeviceId;
-
-    HIP_CHECK(hipMemAdvise(A, numElements*sizeof(float),
-          hipMemAdviseSetReadMostly, device));
-    HIP_CHECK(hipMemPrefetchAsync(A, numElements*sizeof(float), 0));
-    HIP_CHECK(hipMemPrefetchAsync(B, numElements*sizeof(float), 0));
-    HIP_CHECK(hipDeviceSynchronize());
-    HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device),
-          hipMemRangeAttributeLastPrefetchLocation,
-          A, numElements*sizeof(float)));
-    if (device != 0) {
-      INFO("hipMemRangeGetAttribute error, device = " << device);
-    }
-    uint32_t read_only = 0xf;
-    HIP_CHECK(hipMemRangeGetAttribute(&read_only, sizeof(read_only),
-          hipMemRangeAttributeReadMostly,
-          A, numElements*sizeof(float)));
-    if (read_only != 1) {
-      SUCCEED("hipMemRangeGetAttribute error, read_only = " << read_only);
-    }
-
-    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
-                                            numElements);
-    hipEvent_t event0, event1;
-    HIP_CHECK(hipEventCreate(&event0));
-    HIP_CHECK(hipEventCreate(&event1));
-    HIP_CHECK(hipEventRecord(event0, 0));
-    hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
-        0, 0, static_cast<const float*>(A),
-        static_cast<const float*>(B), C, numElements);
-    HIP_CHECK(hipEventRecord(event1, 0));
-    HIP_CHECK(hipDeviceSynchronize());
-    float time = 0.0f;
-    HIP_CHECK(hipEventElapsedTime(&time, event0, event1));
-    printf("Time %.3f ms\n", time);
-    float maxError = 0.0f;
-    HIP_CHECK(hipMemPrefetchAsync(B, numElements*sizeof(float),
-                                  hipCpuDeviceId));
-    HIP_CHECK(hipDeviceSynchronize());
-    device = 0;
-    HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device),
-          hipMemRangeAttributeLastPrefetchLocation,
-          A, numElements*sizeof(float)));
-    if (device != hipCpuDeviceId) {
-      SUCCEED("hipMemRangeGetAttribute error device = " << device);
-    }
-
-    for (int i = 0; i < numElements; i++) {
-      maxError = fmax(maxError, fabs(B[i]-3.0f));
-    }
-    HIP_CHECK(hipFree(A));
-    HIP_CHECK(hipFree(B));
-    REQUIRE(maxError != 0.0f);
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
-}

+  float *A, *B, *C;
+
+  HIP_CHECK(hipMallocManaged(&A, numElements * sizeof(float)));
+  HIP_CHECK(hipMallocManaged(&B, numElements * sizeof(float)));
+  HIP_CHECK(hipMallocManaged(&C, numElements * sizeof(float)));
+  HipTest::setDefaultData(numElements, A, B, C);
+
+  hipDevice_t device = hipCpuDeviceId;
+
+  HIP_CHECK(hipMemAdvise(A, numElements * sizeof(float), hipMemAdviseSetReadMostly, device));
+  HIP_CHECK(hipMemPrefetchAsync(A, numElements * sizeof(float), 0));
+  HIP_CHECK(hipMemPrefetchAsync(B, numElements * sizeof(float), 0));
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device),
+                                    hipMemRangeAttributeLastPrefetchLocation, A,
+                                    numElements * sizeof(float)));
+  if (device != 0) {
+    INFO("hipMemRangeGetAttribute error, device = " << device);
+  }
+  uint32_t read_only = 0xf;
+  HIP_CHECK(hipMemRangeGetAttribute(&read_only, sizeof(read_only), hipMemRangeAttributeReadMostly,
+                                    A, numElements * sizeof(float)));
+  if (read_only != 1) {
+    SUCCEED("hipMemRangeGetAttribute error, read_only = " << read_only);
+  }
+
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
+  hipEvent_t event0, event1;
+  HIP_CHECK(hipEventCreate(&event0));
+  HIP_CHECK(hipEventCreate(&event1));
+  HIP_CHECK(hipEventRecord(event0, 0));
+  hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
+                     static_cast<const float*>(A), static_cast<const float*>(B), C, numElements);
+  HIP_CHECK(hipEventRecord(event1, 0));
+  HIP_CHECK(hipDeviceSynchronize());
+  float time = 0.0f;
+  HIP_CHECK(hipEventElapsedTime(&time, event0, event1));
+  printf("Time %.3f ms\n", time);
+  float maxError = 0.0f;
+  HIP_CHECK(hipMemPrefetchAsync(B, numElements * sizeof(float), hipCpuDeviceId));
+  HIP_CHECK(hipDeviceSynchronize());
+  device = 0;
+  HIP_CHECK(hipMemRangeGetAttribute(&device, sizeof(device),
+                                    hipMemRangeAttributeLastPrefetchLocation, A,
+                                    numElements * sizeof(float)));
+  if (device != hipCpuDeviceId) {
+    SUCCEED("hipMemRangeGetAttribute error device = " << device);
+  }
+
+  for (size_t i = 0; i < numElements; i++) {
+    maxError = fmax(maxError, fabs(B[i] - 3.0f));
+  }
+  HIP_CHECK(hipFree(A));
+  HIP_CHECK(hipFree(B));
+  REQUIRE(maxError != 0.0f);
+}
@@ -0,0 +1,26 @@
+#include <hip_test_common.hh>
+
+static int HmmAttrPrint() {
+  int managed = 0;
+  INFO(
+      "The following are the attribute values related to HMM for"
+      " device 0:\n");
+  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
+  INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
+  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeConcurrentManagedAccess, 0));
+  INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
+  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccess, 0));
+  INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
+  HIP_CHECK(
+      hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
+  INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed);
+
+  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
+  INFO("hipDeviceAttributeManagedMemory: " << managed);
+  if (managed != 1) {
+    WARN(
+        "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute so defaulting to system "
+        "memory.");
+  }
+  return managed;
+}
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -20,245 +20,210 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */

-#include <hip_test_common.hh>
+#include "hipMallocManagedCommon.hh"
 #include <atomic>

 // Kernel function
-__global__ void MallcMangdFlgTst(int n, float *x, float *y) {
+__global__ void MallcMangdFlgTst(int n, float* x, float* y) {
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
-  for (int i = index; i < n; i += stride)
-    y[i] = x[i] * x[i];
-}
-
-
-// The following function prints info on attributes related to HMM
-static int HmmAttrPrint() {
-  int managed = 0;
-  INFO("The following are the attribute values related to HMM for"
-         " device 0:\n");
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
-  INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributeConcurrentManagedAccess, 0));
-  INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributePageableMemoryAccess, 0));
-  INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
-  INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:"
-         << managed);
-
-  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
-                                  0));
-  INFO("hipDeviceAttributeManagedMemory: " << managed);
-  return managed;
+  for (int i = index; i < n; i += stride) y[i] = x[i] * x[i];
 }

 // The following section tests working of hipMallocManaged with flag parameters
 TEST_CASE("Unit_hipMallocManaged_FlgParam") {
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    std::atomic<int> DataMismatch{0};
-    bool IfTestPassed = true;
-    float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5;
-    int NumDevs = 0, NUM_ELMS = 4096;
-    HIP_CHECK(hipGetDeviceCount(&NumDevs));
-    float *Ad = NULL, *Ah = NULL;
-    Ah = new float[NUM_ELMS];
-    // Testing hipMemAttachGlobal Flag
-    HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float),
-                              hipMemAttachGlobal));
-
-    // Initializing HmmAG memory
-    for (int i = 0; i < NUM_ELMS; i++) {
-      HmmAG[i] = INIT_VAL;
-      Ah[i] = 0;
-    }
-
-    int blockSize = 256;
-    int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize;
-    dim3 dimGrid(numBlocks, 1, 1);
-    dim3 dimBlock(blockSize, 1, 1);
-    hipStream_t strm;
-    for (int i = 0; i < NumDevs; i++) {
-      HIP_CHECK(hipSetDevice(i));
-      HIP_CHECK(hipStreamCreate(&strm));
-      HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float)));
-      HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float)));
-      MallcMangdFlgTst<<<dimGrid, dimBlock, 0, strm>>>(NUM_ELMS, HmmAG, Ad);
-      HIP_CHECK(hipStreamSynchronize(strm));
-      HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float),
-                         hipMemcpyDeviceToHost));
-      for (int j = 0; j < NUM_ELMS; ++j) {
-        if (Ah[j] != (INIT_VAL * INIT_VAL)) {
-          DataMismatch++;
-        }
-      }
-      if (DataMismatch != 0) {
-        WARN("Data Mismatch observed when kernel launched on");
-        WARN(" device: " << i);
-        IfTestPassed = false;
-      }
-      DataMismatch = 0;
-
-      HIP_CHECK(hipFree(Ad));
-      HIP_CHECK(hipStreamDestroy(strm));
-    }
-    delete[] Ah;
-    HIP_CHECK(hipFree(HmmAG));
-
-    DataMismatch = 0;
-    HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float),
-                              hipMemAttachHost));
-    HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float),
-                              hipMemAttachHost));
-
-    // Initializing HmmAH memory
-    for (int i = 0; i < NUM_ELMS; i++) {
-      HmmAH1[i] = INIT_VAL;
-      HmmAH2[i] = 0;
-    }
-    for (int i = 0; i < NumDevs; i++) {
-      HIP_CHECK(hipSetDevice(i));
-      HIP_CHECK(hipStreamCreate(&strm));
-      HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float)));
-      MallcMangdFlgTst<<<dimGrid, dimBlock, 0, strm>>>(NUM_ELMS,
-                                                       HmmAH1, HmmAH2);
-      HIP_CHECK(hipStreamSynchronize(strm));
-      for (int j = 0; j < NUM_ELMS; ++j) {
-        if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) {
-          DataMismatch++;
-        }
-      }
-      if (DataMismatch != 0) {
-        WARN("Data Mismatch observed when kernel launched on");
-        WARN(" device: " << i);
-        IfTestPassed = false;
-      }
-      HIP_CHECK(hipStreamDestroy(strm));
-    }
-    HIP_CHECK(hipFree(HmmAH1));
-    HIP_CHECK(hipFree(HmmAH2));
-    REQUIRE(IfTestPassed);
-  } else {
-    SUCCEED("Gpu doesnt support HMM! Hence skipping the test with PASS result");
+  
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
+  
+  std::atomic<int> DataMismatch{0};
+  bool IfTestPassed = true;
+  float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5;
+  int NumDevs = 0, NUM_ELMS = 4096;
+  HIP_CHECK(hipGetDeviceCount(&NumDevs));
+  float *Ad = NULL, *Ah = NULL;
+  Ah = new float[NUM_ELMS];
+  // Testing hipMemAttachGlobal Flag
+  HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal));
+
+  // Initializing HmmAG memory
+  for (int i = 0; i < NUM_ELMS; i++) {
+    HmmAG[i] = INIT_VAL;
+    Ah[i] = 0;
+  }
+
+  int blockSize = 256;
+  int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize;
+  dim3 dimGrid(numBlocks, 1, 1);
+  dim3 dimBlock(blockSize, 1, 1);
+  hipStream_t strm;
+  for (int i = 0; i < NumDevs; i++) {
+    HIP_CHECK(hipSetDevice(i));
+    HIP_CHECK(hipStreamCreate(&strm));
+    HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float)));
+    HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float)));
+    MallcMangdFlgTst<<<dimGrid, dimBlock, 0, strm>>>(NUM_ELMS, HmmAG, Ad);
+    HIP_CHECK(hipStreamSynchronize(strm));
+    HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost));
+    for (int j = 0; j < NUM_ELMS; ++j) {
+      if (Ah[j] != (INIT_VAL * INIT_VAL)) {
+        DataMismatch++;
+      }
+    }
+    if (DataMismatch != 0) {
+      WARN("Data Mismatch observed when kernel launched on");
+      WARN(" device: " << i);
+      IfTestPassed = false;
+    }
+    DataMismatch = 0;
+
+    HIP_CHECK(hipFree(Ad));
+    HIP_CHECK(hipStreamDestroy(strm));
+  }
+  delete[] Ah;
+  HIP_CHECK(hipFree(HmmAG));
+
+  DataMismatch = 0;
+  HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost));
+  HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost));
+
+  // Initializing HmmAH memory
+  for (int i = 0; i < NUM_ELMS; i++) {
+    HmmAH1[i] = INIT_VAL;
+    HmmAH2[i] = 0;
+  }
+  for (int i = 0; i < NumDevs; i++) {
+    HIP_CHECK(hipSetDevice(i));
+    HIP_CHECK(hipStreamCreate(&strm));
+    HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float)));
+    MallcMangdFlgTst<<<dimGrid, dimBlock, 0, strm>>>(NUM_ELMS, HmmAH1, HmmAH2);
+    HIP_CHECK(hipStreamSynchronize(strm));
+    for (int j = 0; j < NUM_ELMS; ++j) {
+      if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) {
+        DataMismatch++;
+      }
+    }
+    if (DataMismatch != 0) {
+      WARN("Data Mismatch observed when kernel launched on");
+      WARN(" device: " << i);
+      IfTestPassed = false;
+    }
+    HIP_CHECK(hipStreamDestroy(strm));
+  }
+  HIP_CHECK(hipFree(HmmAH1));
+  HIP_CHECK(hipFree(HmmAH2));
+  REQUIRE(IfTestPassed);
 }

 // The following function tests Memory access allocated using hipMallocManaged
 // in multiple streams
 TEST_CASE("Unit_hipMallocManaged_AccessMultiStream") {
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    std::atomic<int> DataMismatch{0};
-    bool IfTestPassed = true;
-    float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5;
-    int NumStrms = 0, MultiDevice = 0, NUM_ELMS = 4096;
-    HIP_CHECK(hipGetDeviceCount(&MultiDevice));
-    if (MultiDevice >= 2) {
-      HIP_CHECK(hipGetDeviceCount(&NumStrms));
-    } else {
-      NumStrms = 4;
-    }
-    hipStream_t **Stream = new hipStream_t*[NumStrms];
-    for (int i = 0; i < NumStrms; ++i) {
-      Stream[i] = reinterpret_cast<hipStream_t*>(malloc(sizeof(hipStream_t)));
-    }
-    float *Ad = NULL, *Ah = NULL;
-    Ah = new float[NUM_ELMS];
-    for (int i = 0; i < NumStrms; ++i) {
-      if (MultiDevice >= 2) {
-        HIP_CHECK(hipSetDevice(i));
-      }
-      HIP_CHECK(hipStreamCreate(Stream[i]));
-    }
-    HIP_CHECK(hipSetDevice(0));
-    // Testing hipMemAttachGlobal Flag
-    HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float),
-                              hipMemAttachGlobal));
-
-    // Initializing HmmAG memory
-    for (int i = 0; i < NUM_ELMS; i++) {
-      HmmAG[i] = INIT_VAL;
-      Ah[i] = 0;
-    }
-
-    int blockSize = 256;
-    int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize;
-    dim3 dimGrid(numBlocks, 1, 1);
-    dim3 dimBlock(blockSize, 1, 1);
-    for (int i = 0; i < NumStrms; i++) {
-      if (MultiDevice >= 2) {
-        HIP_CHECK(hipSetDevice(i));
-      }
-      HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float)));
-      HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float)));
-      MallcMangdFlgTst<<<dimGrid, dimBlock, 0, *(Stream[i])>>>(NUM_ELMS,
-                                                            HmmAG, Ad);
-      HIP_CHECK(hipStreamSynchronize(*(Stream[i])));
-      // Validating the results
-      HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float),
-                         hipMemcpyDeviceToHost));
-      for (int j = 0; j < NUM_ELMS; ++j) {
-        if (Ah[j] != (INIT_VAL * INIT_VAL)) {
-          DataMismatch++;
-        }
-      }
-      if (DataMismatch != 0) {
-        WARN("Data Mismatch observed when kernel launched on");
-        WARN(" device: " << i);
-        IfTestPassed = false;
-      }
-      DataMismatch = 0;
-
-      HIP_CHECK(hipFree(Ad));
-    }
-    delete[] Ah;
-    HIP_CHECK(hipFree(HmmAG));
-
-    DataMismatch = 0;
-    HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float),
-                                hipMemAttachHost));
-    HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float),
-                              hipMemAttachHost));
-
-    // Initializing HmmAH memory
-    for (int i = 0; i < NUM_ELMS; i++) {
-      HmmAH1[i] = INIT_VAL;
-      HmmAH2[i] = 0;
-    }
-    for (int i = 0; i < NumStrms; i++) {
-      if (MultiDevice >= 2) {
-        HIP_CHECK(hipSetDevice(i));
-      }
-      HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float)));
-      MallcMangdFlgTst<<<dimGrid, dimBlock, 0, *(Stream[i])>>>(NUM_ELMS,
-                                                            HmmAH1, HmmAH2);
-      HIP_CHECK(hipStreamSynchronize(*(Stream[i])));
-      for (int j = 0; j < NUM_ELMS; ++j) {
-        if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) {
-          DataMismatch++;
-          break;
-        }
-      }
-      if (DataMismatch != 0) {
-        WARN("Data Mismatch observed when kernel launched on");
-        WARN(" device: " << i);
-        IfTestPassed = false;
-      }
-    }
-
-    HIP_CHECK(hipFree(HmmAH1));
-    HIP_CHECK(hipFree(HmmAH2));
-    for (int i = 0; i < NumStrms; ++i) {
-      HIP_CHECK(hipStreamDestroy(*(Stream[i])));
-    }
-    REQUIRE(IfTestPassed);
-  } else {
-    SUCCEED("Gpu doesnt support HMM! Hence skipping the test with PASS result");
+  
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
-}
+  
+  std::atomic<int> DataMismatch{0};
+  bool IfTestPassed = true;
+  float *HmmAG = NULL, *HmmAH1 = NULL, *HmmAH2 = NULL, INIT_VAL = 2.5;
+  int NumStrms = 0, MultiDevice = 0, NUM_ELMS = 4096;
+  HIP_CHECK(hipGetDeviceCount(&MultiDevice));
+  if (MultiDevice >= 2) {
+    HIP_CHECK(hipGetDeviceCount(&NumStrms));
+  } else {
+    NumStrms = 4;
+  }
+  hipStream_t** Stream = new hipStream_t*[NumStrms];
+  for (int i = 0; i < NumStrms; ++i) {
+    Stream[i] = reinterpret_cast<hipStream_t*>(malloc(sizeof(hipStream_t)));
+  }
+  float *Ad = NULL, *Ah = NULL;
+  Ah = new float[NUM_ELMS];
+  for (int i = 0; i < NumStrms; ++i) {
+    if (MultiDevice >= 2) {
+      HIP_CHECK(hipSetDevice(i));
+    }
+    HIP_CHECK(hipStreamCreate(Stream[i]));
+  }
+  HIP_CHECK(hipSetDevice(0));
+  // Testing hipMemAttachGlobal Flag
+  HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal));

+  // Initializing HmmAG memory
+  for (int i = 0; i < NUM_ELMS; i++) {
+    HmmAG[i] = INIT_VAL;
+    Ah[i] = 0;
+  }
+
+  int blockSize = 256;
+  int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize;
+  dim3 dimGrid(numBlocks, 1, 1);
+  dim3 dimBlock(blockSize, 1, 1);
+  for (int i = 0; i < NumStrms; i++) {
+    if (MultiDevice >= 2) {
+      HIP_CHECK(hipSetDevice(i));
+    }
+    HIP_CHECK(hipMalloc(&Ad, NUM_ELMS * sizeof(float)));
+    HIP_CHECK(hipMemset(Ad, 0, NUM_ELMS * sizeof(float)));
+    MallcMangdFlgTst<<<dimGrid, dimBlock, 0, *(Stream[i])>>>(NUM_ELMS, HmmAG, Ad);
+    HIP_CHECK(hipStreamSynchronize(*(Stream[i])));
+    // Validating the results
+    HIP_CHECK(hipMemcpy(Ah, Ad, NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost));
+    for (int j = 0; j < NUM_ELMS; ++j) {
+      if (Ah[j] != (INIT_VAL * INIT_VAL)) {
+        DataMismatch++;
+      }
+    }
+    if (DataMismatch != 0) {
+      WARN("Data Mismatch observed when kernel launched on");
+      WARN(" device: " << i);
+      IfTestPassed = false;
+    }
+    DataMismatch = 0;
+
+    HIP_CHECK(hipFree(Ad));
+  }
+  delete[] Ah;
+  HIP_CHECK(hipFree(HmmAG));
+
+  DataMismatch = 0;
+  HIP_CHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost));
+  HIP_CHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost));
+
+  // Initializing HmmAH memory
+  for (int i = 0; i < NUM_ELMS; i++) {
+    HmmAH1[i] = INIT_VAL;
+    HmmAH2[i] = 0;
+  }
+  for (int i = 0; i < NumStrms; i++) {
+    if (MultiDevice >= 2) {
+      HIP_CHECK(hipSetDevice(i));
+    }
+    HIP_CHECK(hipMemset(HmmAH2, 0, NUM_ELMS * sizeof(float)));
+    MallcMangdFlgTst<<<dimGrid, dimBlock, 0, *(Stream[i])>>>(NUM_ELMS, HmmAH1, HmmAH2);
+    HIP_CHECK(hipStreamSynchronize(*(Stream[i])));
+    for (int j = 0; j < NUM_ELMS; ++j) {
+      if (HmmAH2[j] != (INIT_VAL * INIT_VAL)) {
+        DataMismatch++;
+        break;
+      }
+    }
+    if (DataMismatch != 0) {
+      WARN("Data Mismatch observed when kernel launched on");
+      WARN(" device: " << i);
+      IfTestPassed = false;
+    }
+  }
+
+  HIP_CHECK(hipFree(HmmAH1));
+  HIP_CHECK(hipFree(HmmAH2));
+  for (int i = 0; i < NumStrms; ++i) {
+    HIP_CHECK(hipStreamDestroy(*(Stream[i])));
+  }
+  REQUIRE(IfTestPassed);
+}
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
   Permission is hereby granted, free of charge, to any person obtaining a copy
   of this software and associated documentation files (the "Software"), to deal
   in the Software without restriction, including without limitation the rights
@@ -27,28 +27,81 @@
   6. Multiple Pointers
 */

-#include <hip_test_common.hh>
+#include "hipMallocManagedCommon.hh"
 #include <hip_test_kernels.hh>
 #include <hip_test_checkers.hh>
 #include <atomic>

 const size_t MAX_GPU{256};
-static size_t N{4*1024*1024};
+static size_t N{4 * 1024 * 1024};
+static unsigned blocksPerCU{6};
+static unsigned threadsPerBlock{256};
 #define INIT_VAL 123


 /*
 * Kernel function to perform addition operation.
 */
-template <typename T>
-__global__ void
-vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x;
+template <typename T> __global__ void vector_sum(T* Ad1, T* Ad2, size_t NUM_ELMTS) {
+  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
+  size_t stride = blockDim.x * gridDim.x;

-    for (size_t i = offset; i < NUM_ELMTS; i += stride) {
-        Ad2[i] = Ad1[i] + Ad1[i];
-    }
+  for (size_t i = offset; i < NUM_ELMTS; i += stride) {
+    Ad2[i] = Ad1[i] + Ad1[i];
+  }
+}
+
+/*
+ * Kernel function to perform multiplication
+ */
+__global__ void KernelDouble(float* Hmm, float* dPtr, size_t n) {
+  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < n) {
+    dPtr[index] = 2 * Hmm[index];
+  }
+}
+
+/*
+ * Host function to perform multiplication
+ */
+void HostKernelDouble(float* Hmm, float* hPtr, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    hPtr[i] = 2 * Hmm[i];
+  }
+}
+
+/*
+   This testcase verifies the concurrent access of hipMallocManaged Memory on host and device.
+ */
+TEST_CASE("Unit_hipMallocManaged_HostDeviceConcurrent") {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
+  float *Hmm = nullptr, *hPtr = nullptr, *dPtr = nullptr, *resPtr = nullptr;
+
+  hPtr = reinterpret_cast<float*>(malloc(N * sizeof(float)));
+  resPtr = reinterpret_cast<float*>(malloc(N * sizeof(float)));
+
+  HIP_CHECK(hipMalloc(&dPtr, N * sizeof(float)));
+  HIP_CHECK(hipMallocManaged(&Hmm, N * sizeof(float)));
+  memset(Hmm, 2.0, N * sizeof(float));
+
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
+  std::thread host_thread(HostKernelDouble, Hmm, hPtr, N);
+  KernelDouble<<<dim3(blocks), dim3(threadsPerBlock), 0, 0>>>(Hmm, dPtr, N);
+  host_thread.join();
+  hipMemcpy(resPtr, dPtr, N * sizeof(float), hipMemcpyDeviceToHost);
+
+  for (size_t i = 0; i < N; i++) {
+    REQUIRE(hPtr[i] == resPtr[i]);
+  }
+
+  free(hPtr);
+  HIP_CHECK(hipFree(dPtr));
+  HIP_CHECK(hipFree(Hmm));
 }

 // The following Test case tests the following scenario:
@@ -57,7 +110,13 @@ vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) {
 // kernel is launched on acessed chunk of hmm memory
 // and checks if there are any inconsistencies or access issues
 TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") {
-std::atomic<int> DataMismatch{0};
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
+  std::atomic<int> DataMismatch{0};
  constexpr int Chunks = 4;
  int Counter = 0;
  int NUM_ELMS = (1024 * 1024);
@@ -74,16 +133,14 @@ std::atomic<int> DataMismatch{0};
      Hmm[Counter] = (INIT_VAL + i);
    }
  }
-  const unsigned threadsPerBlock = 256;
-  const unsigned blocks = (NUM_ELMS + 255)/256;
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
  for (int k = 0; k < Chunks; ++k) {
-    vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[k]>>>
-                      (&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS);
+    vector_sum<float>
+        <<<blocks, threadsPerBlock, 0, stream[k]>>>(&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS);
  }
  HIP_CHECK(hipDeviceSynchronize());
  for (int m = 0; m < Chunks; ++m) {
-    HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
-                       hipMemcpyDeviceToHost));
+    HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost));
    for (int n = 0; n < NUM_ELMS; ++n) {
      if (Ah[n] != ((INIT_VAL + m) * 2)) {
        DataMismatch++;
@@ -96,7 +153,7 @@ std::atomic<int> DataMismatch{0};
    HIP_CHECK(hipStreamDestroy(stream[i]));
  }
  HIP_CHECK(hipFree(Hmm));
-  delete [] Ah;
+  delete[] Ah;
 }

 // The following Test case tests the following scenario:
@@ -105,10 +162,20 @@ std::atomic<int> DataMismatch{0};
 // kernel is launched on acessed chunk of hmm memory
 // and checks if there are any inconsistencies or access issues
 TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
  std::atomic<int> DataMismatch{0};
  int Counter = 0;
  int NumDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&NumDevices));
+  if (NumDevices < 2) {
+    HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found.");
+    return;
+  }
  unsigned int NUM_ELMS = (1024 * 1024);
  float *Ad[MAX_GPU], *Hmm = NULL, *Ah = new float[NUM_ELMS];
  hipStream_t stream[MAX_GPU];
@@ -124,17 +191,15 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
      Hmm[Counter] = INIT_VAL + i;
    }
  }
-  const unsigned threadsPerBlock = 256;
-  const unsigned blocks = (NUM_ELMS + 255)/256;
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
  for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) {
    HIP_CHECK(hipSetDevice(Klaunch));
-    vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[Klaunch]>>>
-                      (&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS);
+    vector_sum<float><<<blocks, threadsPerBlock, 0, stream[Klaunch]>>>(&Hmm[Klaunch * NUM_ELMS],
+                                                                       Ad[Klaunch], NUM_ELMS);
  }
  HIP_CHECK(hipDeviceSynchronize());
  for (int m = 0; m < NumDevices; ++m) {
-    HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
-                       hipMemcpyDeviceToHost));
+    HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), hipMemcpyDeviceToHost));
    for (size_t n = 0; n < NUM_ELMS; ++n) {
      if (Ah[n] != ((INIT_VAL + m) * 2)) {
        DataMismatch++;
@@ -148,32 +213,38 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
    HIP_CHECK(hipStreamDestroy(stream[i]));
  }
  HIP_CHECK(hipFree(Hmm));
-  delete [] Ah;
+  delete[] Ah;
 }

 // The following tests oversubscription hipMallocManaged() api
 // Currently disabled.
 TEST_CASE("Unit_hipMallocManaged_OverSubscription") {
-  void *A = nullptr;
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
+  void* A = nullptr;
  size_t total = 0, free = 0;
  HIP_CHECK(hipMemGetInfo(&free, &total));
  // ToDo: In case of HMM, memory over-subscription is allowed.  Hence, relook
  // into how out of memory can be tested.
  // Demanding more mem size than available
 #if HT_AMD
-  REQUIRE(hipMallocManaged(&A, (free +1), hipMemAttachGlobal) != hipSuccess);
+  HIP_CHECK_ERROR(hipMallocManaged(&A, (free + 1), hipMemAttachGlobal), hipErrorOutOfMemory);
 #endif
 }

 // The following test does negative testing of hipMallocManaged() api
 // by passing invalid values and check if the behavior is as expected
 TEST_CASE("Unit_hipMallocManaged_Negative") {
-  void *A;
+  void* A;
  size_t total = 0, free = 0;
  HIP_CHECK(hipMemGetInfo(&free, &total));

  SECTION("Nullptr to devPtr") {
-    REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachGlobal) != hipSuccess);
+    HIP_CHECK_ERROR(hipMallocManaged(NULL, 1024, hipMemAttachGlobal), hipErrorInvalidValue);
  }

  // cuda api doc says : If size is 0, cudaMallocManaged returns
@@ -184,14 +255,14 @@ TEST_CASE("Unit_hipMallocManaged_Negative") {
  // reset ptr while returning success (to accommodate cuda 11.2 api behavior).
  SECTION("size 0 with flag hipMemAttachGlobal") {
 #if HT_AMD
-    REQUIRE(hipMallocManaged(&A, 0, hipMemAttachGlobal) != hipSuccess);
+    HIP_CHECK_ERROR(hipMallocManaged(&A, 0, hipMemAttachGlobal), hipErrorInvalidValue);
 #else
-    REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
+    HIP_CHECK(hipMallocManaged(&A, 0, hipMemAttachGlobal));
 #endif
  }

  SECTION("devptr is nullptr with flag hipMemAttachHost") {
-    REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachHost) != hipSuccess);
+    HIP_CHECK_ERROR(hipMallocManaged(NULL, 1024, hipMemAttachHost), hipErrorInvalidValue);
  }

  // cuda api doc says : If size is 0, cudaMallocManaged returns
@@ -202,32 +273,47 @@ TEST_CASE("Unit_hipMallocManaged_Negative") {
  // reset ptr while returning success (to accommodate cuda 11.2 api behavior).
  SECTION("size 0 with flag hipMemAttachHost") {
 #if HT_AMD
-    REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) != hipSuccess);
+    HIP_CHECK_ERROR(hipMallocManaged(&A, 0, hipMemAttachHost), hipErrorInvalidValue);
 #else
-    REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
+    HIP_CHECK(hipMallocManaged(&A, 0, hipMemAttachHost));
 #endif
  }
+
  SECTION("nullptr to devptr, size 0 and flag 0") {
-    REQUIRE(hipMallocManaged(NULL, 0, 0) != hipSuccess);
+    HIP_CHECK_ERROR(hipMallocManaged(NULL, 0, 0), hipErrorInvalidValue);
  }

-  SECTION("Numeric value to flag parameter") {
-    REQUIRE(hipMallocManaged(&A, 1024, 145) != hipSuccess);
+  SECTION("Invalid flag parameter") {
+    HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, 145), hipErrorInvalidValue);
+  }
+  SECTION("Invalid flag parameter- flag set to 0") {
+    HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, 0), hipErrorInvalidValue);
+  }
+  SECTION("Invalid flag parameter- Both flags set") {
+    HIP_CHECK_ERROR(hipMallocManaged(&A, 1024, hipMemAttachGlobal | hipMemAttachHost),
+                    hipErrorInvalidValue);
  }

-  SECTION("Negative value to size") {
-    REQUIRE(hipMallocManaged(&A, -10, hipMemAttachGlobal));
+  SECTION("Max value to size") {
+    HIP_CHECK_ERROR(hipMallocManaged(&A, std::numeric_limits<size_t>::max(), hipMemAttachGlobal),
+                    hipErrorOutOfMemory);
  }
 }

 // Allocate two pointers using hipMallocManaged(), initialize,
 // then launch kernel using these pointers directly and
 // later validate the content without using any Memcpy.
-TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "",
-                    int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double) {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
  int NumDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&NumDevices));
  TestType *Hmm1 = nullptr, *Hmm2 = nullptr;
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);

  for (int i = 0; i < NumDevices; ++i) {
    HIP_CHECK(hipSetDevice(i));
@@ -238,10 +324,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "",
      Hmm1[m] = m;
      Hmm2[m] = 0;
    }
-    const unsigned threadsPerBlock = 256;
-    const unsigned blocks = (N + 255)/256;
    // Kernel launch
-    vector_sum <<<blocks, threadsPerBlock>>> (Hmm1, Hmm2, N);
+    vector_sum<<<blocks, threadsPerBlock>>>(Hmm1, Hmm2, N);
    HIP_CHECK(hipDeviceSynchronize());
    for (size_t v = 0; v < N; ++v) {
      if (Hmm2[v] != static_cast<TestType>(v + v)) {
@@ -259,18 +343,28 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "",
 // to all other devices. This include verification and Device two Device
 // transfers and kernel launch o discover if there any access issues.

-TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
-    unsigned char, int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", unsigned char, int, float,
+                   double) {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
  std::atomic<unsigned int> DataMismatch;
-  TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr,
-           *Hmm = nullptr;
+  TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr, *Hmm = nullptr;
  int NumDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&NumDevices));
+  if (NumDevices < 2) {
+    HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found.");
+    return;
+  }

-  for (size_t i =0; i < N; ++i) {
+  for (size_t i = 0; i < N; ++i) {
    Ah1[i] = INIT_VAL;
    Ah2[i] = 0;
  }
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
  for (int Oloop = 0; Oloop < NumDevices; ++Oloop) {
    DataMismatch = 0;
    HIP_CHECK(hipSetDevice(Oloop));
@@ -279,8 +373,7 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
      HIP_CHECK(hipSetDevice(Iloop));
      HIP_CHECK(hipMalloc(&Ad, N * sizeof(TestType)));
      // Copy data from host to hipMallocMananged memory and verify
-      HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType),
-            hipMemcpyHostToDevice));
+      HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType), hipMemcpyHostToDevice));
      for (size_t v = 0; v < N; ++v) {
        if (Hmm[v] != INIT_VAL) {
          DataMismatch++;
@@ -289,10 +382,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
      REQUIRE(DataMismatch.load() == 0);

      // Executing D2D transfer with hipMallocManaged memory and verify
-      HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType),
-                          hipMemcpyDeviceToDevice));
-      HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
-                          hipMemcpyDeviceToHost));
+      HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType), hipMemcpyDeviceToDevice));
+      HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), hipMemcpyDeviceToHost));
      for (size_t k = 0; k < N; ++k) {
        if (Ah2[k] != INIT_VAL) {
          DataMismatch++;
@@ -300,14 +391,11 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
      }
      REQUIRE(DataMismatch.load() == 0);
      HIP_CHECK(hipMemset(Ad, 0, N * sizeof(TestType)));
-      const unsigned threadsPerBlock = 256;
-      const unsigned blocks = (N + 255)/256;
      // Launching the kernel to check if there is any access issue with
      // hipMallocManaged memory and local device's memory
-      vector_sum <<<blocks, threadsPerBlock>>> (Hmm, Ad, N);
-      hipDeviceSynchronize();
-      HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
-                          hipMemcpyDeviceToHost));
+      vector_sum<<<blocks, threadsPerBlock>>>(Hmm, Ad, N);
+      HIP_CHECK(hipDeviceSynchronize());
+      HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), hipMemcpyDeviceToHost));
      for (size_t m = 0; m < N; ++m) {
        if (Ah2[m] != 246) {
          DataMismatch++;
@@ -20,25 +20,25 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 #include <hip_test_common.hh>
+#include "hipMallocManagedCommon.hh"
 #include <atomic>


 // Kernel functions
-__global__ void HmmMultiThread(int n, float *x, float *y) {
+__global__ void HmmMultiThread(int n, float* x, float* y) {
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
-  for (int i = index; i < n; i += stride)
-    y[i] = x[i] * x[i];
+  for (int i = index; i < n; i += stride) y[i] = x[i] * x[i];
 }

-__global__ void KrnlWth2MemTypes(int *Hmm, int *Dptr, size_t n) {
+__global__ void KrnlWth2MemTypes(int* Hmm, int* Dptr, size_t n) {
  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
  for (size_t i = index; i < n; i++) {
    Hmm[i] = Dptr[i] + 10;
  }
 }

-__global__ void KernelMul_MngdMem123(int *Hmm, int *Dptr, size_t n) {
+__global__ void KernelMul_MngdMem123(int* Hmm, int* Dptr, size_t n) {
  size_t index = blockIdx.x * blockDim.x + threadIdx.x;
  size_t stride = blockDim.x * gridDim.x;
  for (size_t i = index; i < n; i += stride) {
@@ -47,32 +47,27 @@ __global__ void KernelMul_MngdMem123(int *Hmm, int *Dptr, size_t n) {
 }


-
 // The following variable is used to determine the failure of test case
-static bool  IfTestPassed = true;
+static bool IfTestPassed = true;

-static void LaunchKrnl(int *Hmm1, size_t NumElms, int InitVal, int GpuOrdnl,
-                       int AdviseFlg) {
-  int *Hmm2 = NULL;
+static void LaunchKrnl(int* Hmm1, size_t NumElms, int InitVal, int GpuOrdnl, int AdviseFlg) {
+  int* Hmm2 = NULL;
  hipStream_t strm;
  HIPCHECK(hipSetDevice(GpuOrdnl));
  HIPCHECK(hipStreamCreate(&strm));
  if (AdviseFlg == 0) {
-    HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int),
-                           hipMemAdviseSetReadMostly, GpuOrdnl));
+    HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetReadMostly, GpuOrdnl));
  } else if (AdviseFlg == 1) {
-    HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int),
-                           hipMemAdviseSetPreferredLocation, GpuOrdnl));
+    HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetPreferredLocation, GpuOrdnl));
  } else if (AdviseFlg == 2) {
-    HIPCHECK(hipMemAdvise(Hmm1 , NumElms * sizeof(int),
-                           hipMemAdviseSetAccessedBy, GpuOrdnl));
+    HIPCHECK(hipMemAdvise(Hmm1, NumElms * sizeof(int), hipMemAdviseSetAccessedBy, GpuOrdnl));
  } else if (AdviseFlg == 3) {
    HIPCHECK(hipMemPrefetchAsync(Hmm1, NumElms * sizeof(int), GpuOrdnl, strm));
    HIPCHECK(hipStreamSynchronize(strm));
  }
  HIPCHECK(hipMallocManaged(&Hmm2, (sizeof(int) * NumElms)));
  for (int i = 0; i < 2; ++i) {
-    KrnlWth2MemTypes<<<((NumElms + 63)/64), 64, 0, strm>>>(Hmm2, Hmm1, NumElms);
+    KrnlWth2MemTypes<<<((NumElms + 63) / 64), 64, 0, strm>>>(Hmm2, Hmm1, NumElms);
    HIPCHECK(hipStreamSynchronize(strm));
  }
  // Verifying the result
@@ -88,7 +83,7 @@ static void LaunchKrnl(int *Hmm1, size_t NumElms, int InitVal, int GpuOrdnl,
  }
 }

-static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) {
+static void LaunchKrnl2(int* Hmm, size_t NumElms, int InitVal, int HmmMem) {
  int *ptr = nullptr, blockSize = 64, *HstPtr = nullptr;
  hipStream_t strm;
  HIPCHECK(hipStreamCreate(&strm));
@@ -99,7 +94,7 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) {
    HIPCHECK(hipMallocManaged(&ptr, (sizeof(int) * NumElms)));
  }
  dim3 dimBlock(blockSize, 1, 1);
-  dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1);
+  dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1);
  for (int i = 0; i < 2; ++i) {
    KrnlWth2MemTypes<<<dimGrid, dimBlock, 0, strm>>>(ptr, Hmm, NumElms);
  }
@@ -107,8 +102,7 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) {
  // Verifying the result
  int DataMismatch = 0;
  if (HmmMem == 0) {
-    HIPCHECK(hipMemcpy(HstPtr, ptr, (sizeof(int) * NumElms),
-                        hipMemcpyDeviceToHost));
+    HIPCHECK(hipMemcpy(HstPtr, ptr, (sizeof(int) * NumElms), hipMemcpyDeviceToHost));
    for (size_t i = 0; i < NumElms; ++i) {
      if (HstPtr[i] != (InitVal + 10)) {
        DataMismatch++;
@@ -127,13 +121,13 @@ static void LaunchKrnl2(int *Hmm, size_t NumElms, int InitVal, int HmmMem) {
  }
 }

-static void LaunchKrnl3(int *Dptr, size_t NumElms, int InitVal) {
+static void LaunchKrnl3(int* Dptr, size_t NumElms, int InitVal) {
  int *Hmm = NULL, blockSize = 64;
  hipStream_t strm;
  HIPCHECK(hipStreamCreate(&strm));
  HIPCHECK(hipMallocManaged(&Hmm, (sizeof(int) * NumElms)));
  dim3 dimBlock(blockSize, 1, 1);
-  dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1);
+  dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1);
  for (int i = 0; i < 2; ++i) {
    KrnlWth2MemTypes<<<dimGrid, dimBlock, 0, strm>>>(Hmm, Dptr, NumElms);
  }
@@ -152,14 +146,13 @@ static void LaunchKrnl3(int *Dptr, size_t NumElms, int InitVal) {
 }


-static void LaunchKrnl5(int *Hmm1, size_t NumElms, int InitVal,
-                        int KerneltoLaunch) {
+static void LaunchKrnl5(int* Hmm1, size_t NumElms, int InitVal, int KerneltoLaunch) {
  int *Hmm2 = NULL, blockSize = 64;
  hipStream_t strm;
  HIPCHECK(hipStreamCreate(&strm));
  HIPCHECK(hipMallocManaged(&Hmm2, (sizeof(int) * NumElms)));
  dim3 dimBlock(blockSize, 1, 1);
-  dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1);
+  dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1);
  for (int i = 0; i < 2; ++i) {
    if (KerneltoLaunch == 0) {
      KrnlWth2MemTypes<<<dimGrid, dimBlock, 0, strm>>>(Hmm2, Hmm1, NumElms);
@@ -200,8 +193,7 @@ static void TestFlagParamGlobal(int dev) {
  HIPCHECK(hipSetDevice(dev));
  HIPCHECK(hipStreamCreate(&strm));
  // Testing hipMemAttachGlobal Flag
-  HIPCHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float),
-                            hipMemAttachGlobal));
+  HIPCHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), hipMemAttachGlobal));

  // Initializing HmmAG memory
  for (int i = 0; i < NUM_ELMS; i++) {
@@ -246,10 +238,8 @@ static void TestFlagParamHost(int dev) {
  hipStream_t strm;
  HIPCHECK(hipSetDevice(dev));
  HIPCHECK(hipStreamCreate(&strm));
-  HIPCHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float),
-                            hipMemAttachHost));
-  HIPCHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float),
-                            hipMemAttachHost));
+  HIPCHECK(hipMallocManaged(&HmmAH1, NUM_ELMS * sizeof(float), hipMemAttachHost));
+  HIPCHECK(hipMallocManaged(&HmmAH2, NUM_ELMS * sizeof(float), hipMemAttachHost));
  // Initializing HmmAH memory
  for (int i = 0; i < NUM_ELMS; i++) {
    HmmAH1[i] = INIT_VAL;
@@ -294,77 +284,54 @@ static void AllocateHmmMemory(int flag, int device) {
  }
 }

-
-static int HmmAttrPrint() {
-  int managed = 0;
-  INFO("The following are the attribute values related to HMM for"
-         " device 0:\n");
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributeDirectManagedMemAccessFromHost, 0));
-  INFO("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributeConcurrentManagedAccess, 0));
-  INFO("hipDeviceAttributeConcurrentManagedAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                 hipDeviceAttributePageableMemoryAccess, 0));
-  INFO("hipDeviceAttributePageableMemoryAccess: " << managed);
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-              hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0));
-  INFO("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:"
-         << managed);
-
-  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory,
-                                  0));
-  INFO("hipDeviceAttributeManagedMemory: " << managed);
-  return managed;
-}
-
 TEST_CASE("Unit_hipMallocManaged_MultiThread") {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
  IfTestPassed = true;
-  int NumDevs = 0, managed = 0, ATTACH_GLOBAL = 0, ATTACH_HOST = 1;
+  int NumDevs = 0, ATTACH_GLOBAL = 0, ATTACH_HOST = 1;
  int ITERATIONS = 10;
-  managed = HmmAttrPrint();
-  if (managed) {
-    HIP_CHECK(hipGetDeviceCount(&NumDevs));
-    std::vector<std::thread> T1;
-    std::vector<std::thread> T2;
-    for (int i = 0; i < NumDevs; ++i) {
-      for (int j = 0; j < ITERATIONS; ++j) {
-        T1.push_back(std::thread(TestFlagParamGlobal, i));
-        T2.push_back(std::thread(AllocateHmmMemory, ATTACH_GLOBAL, i));
-      }
-      for (auto &t1 : T1) {
-        if (t1.joinable()) {
-          t1.join();
-        }
-      }
-      for (auto &t2 : T2) {
-        if (t2.joinable()) {
-          t2.join();
-        }
+
+
+  HIP_CHECK(hipGetDeviceCount(&NumDevs));
+  std::vector<std::thread> T1;
+  std::vector<std::thread> T2;
+  for (int i = 0; i < NumDevs; ++i) {
+    for (int j = 0; j < ITERATIONS; ++j) {
+      T1.push_back(std::thread(TestFlagParamGlobal, i));
+      T2.push_back(std::thread(AllocateHmmMemory, ATTACH_GLOBAL, i));
+    }
+    for (auto& t1 : T1) {
+      if (t1.joinable()) {
+        t1.join();
      }
    }
-    T1.clear();
-    T2.clear();
-    for (int i = 0; i < NumDevs; ++i) {
-      for (int j = 0; j < ITERATIONS; ++j) {
-        T1.push_back(std::thread(TestFlagParamHost, i));
-        T2.push_back(std::thread(AllocateHmmMemory, ATTACH_HOST, i));
-      }
-      for (auto &t1 : T1) {
-        if (t1.joinable()) {
-          t1.join();
-        }
-      }
-      for (auto &t2 : T2) {
-        if (t2.joinable()) {
-          t2.join();
-        }
+    for (auto& t2 : T2) {
+      if (t2.joinable()) {
+        t2.join();
+      }
+    }
+  }
+  T1.clear();
+  T2.clear();
+  for (int i = 0; i < NumDevs; ++i) {
+    for (int j = 0; j < ITERATIONS; ++j) {
+      T1.push_back(std::thread(TestFlagParamHost, i));
+      T2.push_back(std::thread(AllocateHmmMemory, ATTACH_HOST, i));
+    }
+    for (auto& t1 : T1) {
+      if (t1.joinable()) {
+        t1.join();
+      }
+    }
+    for (auto& t2 : T2) {
+      if (t2.joinable()) {
+        t2.join();
      }
    }
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory"
-            "attribute. Hence skipping the testing with Pass result.\n");
  }
  REQUIRE(IfTestPassed);
 }
@@ -372,175 +339,169 @@ TEST_CASE("Unit_hipMallocManaged_MultiThread") {
 // The following test checks what happens when same Hmm memory is used to
 // launch multiple threads over multiple gpus
 TEST_CASE("Unit_hipMallocManaged_MGpuMThread") {
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }
+
  IfTestPassed = true;
  int Ngpus = 0;
  HIP_CHECK(hipGetDeviceCount(&Ngpus));
  if (Ngpus < 2) {
-    WARN("This test needs atleast 2 or more gpus, but the system");
-    WARN(" has only " << Ngpus);
-    WARN(" gpus. Hence skipping the test.");
-    SUCCEED("\n");
+    HipTest::HIP_SKIP_TEST("Skipping test because more than one device was not found.");
+    return;
  }
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    int InitVal = 123, *Hmm1 = NULL, NumElms = 4096*4;
-    HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int))));
-    for (int i = 0; i < NumElms; ++i) {
-      Hmm1[i] = InitVal;
-    }

-    std::vector<std::thread> Thrds;
-    // AdviseFlg=0 for ReadMostly to be applied
-    // AdviseFlg=1 for PreferredLocation to be applied
-    // AdviseFlg=2 for AccessedBy to be applied
-    // AdviseFlg=3 to prefetch the memory to particular gpu
-    for (int AdviseFlg = 0; AdviseFlg < 4; ++AdviseFlg) {
-      for (int i = 0; i < Ngpus; ++i) {
-        Thrds.push_back(std::thread(LaunchKrnl, Hmm1, NumElms, InitVal, i,
-                                    AdviseFlg));
-      }
-      for (auto &thr : Thrds) {
-        if (thr.joinable()) {
-          thr.join();
-        }
+  int InitVal = 123, *Hmm1 = NULL, NumElms = 4096 * 4;
+  HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int))));
+  for (int i = 0; i < NumElms; ++i) {
+    Hmm1[i] = InitVal;
+  }
+
+  std::vector<std::thread> Thrds;
+  // AdviseFlg=0 for ReadMostly to be applied
+  // AdviseFlg=1 for PreferredLocation to be applied
+  // AdviseFlg=2 for AccessedBy to be applied
+  // AdviseFlg=3 to prefetch the memory to particular gpu
+  for (int AdviseFlg = 0; AdviseFlg < 4; ++AdviseFlg) {
+    for (int i = 0; i < Ngpus; ++i) {
+      Thrds.push_back(std::thread(LaunchKrnl, Hmm1, NumElms, InitVal, i, AdviseFlg));
+    }
+    for (auto& thr : Thrds) {
+      if (thr.joinable()) {
+        thr.join();
      }
    }
-    REQUIRE(IfTestPassed);
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
  }
+  REQUIRE(IfTestPassed);
 }


 // The following test checks what happens when multiple kernels are launched
 // with same Hmm memory
 TEST_CASE("Unit_hipMallocManaged_MultiKrnlComnHmm") {
-  IfTestPassed = true;
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    int InitVal = 123, *Hmm = NULL, NumElms = 1024*4, TotThrds = 2;
-    int HmmMem2 = 0, *HstPtr = nullptr;  //  to indicate the thread that
-    //  hipMalloc() memory has to be used
-    HstPtr = reinterpret_cast<int*>(new int[NumElms]);
-    HIP_CHECK(hipMalloc(&Hmm, (NumElms * sizeof(int))));
-    for (int i = 0; i < NumElms; ++i) {
-      HstPtr[i] = InitVal;
-    }
-    HIP_CHECK(hipMemcpy(Hmm, HstPtr, (NumElms * sizeof(int)),
-                        hipMemcpyHostToDevice));
-    std::vector<std::thread> Thrds;
-    for (int i = 0; i < TotThrds; ++i) {
-      Thrds.push_back(std::thread(LaunchKrnl2, Hmm, NumElms, InitVal, HmmMem2));
-    }
-
-    for (auto &thr : Thrds) {
-      if (thr.joinable()) {
-        thr.join();
-      }
-    }
-    delete[] HstPtr;
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
+
+  IfTestPassed = true;
+
+  int InitVal = 123, *Hmm = NULL, NumElms = 1024 * 4, TotThrds = 2;
+  int HmmMem2 = 0, *HstPtr = nullptr;  //  to indicate the thread that
+  //  hipMalloc() memory has to be used
+  HstPtr = reinterpret_cast<int*>(new int[NumElms]);
+  HIP_CHECK(hipMalloc(&Hmm, (NumElms * sizeof(int))));
+  for (int i = 0; i < NumElms; ++i) {
+    HstPtr[i] = InitVal;
+  }
+  HIP_CHECK(hipMemcpy(Hmm, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice));
+  std::vector<std::thread> Thrds;
+  for (int i = 0; i < TotThrds; ++i) {
+    Thrds.push_back(std::thread(LaunchKrnl2, Hmm, NumElms, InitVal, HmmMem2));
+  }
+
+  for (auto& thr : Thrds) {
+    if (thr.joinable()) {
+      thr.join();
+    }
+  }
+  delete[] HstPtr;
 }


 // The following test checks what happens when multiple kernels are launched
 // with same hipMalloc() memory
 TEST_CASE("Unit_hipMallocManaged_MultiKrnlComnMalloc") {
-  IfTestPassed = true;
-  int managed = HmmAttrPrint();
-  if (managed) {
-    int InitVal = 123, *Dptr = NULL, NumElms = 4096*8, TotThrds = 2;
-    int *HstPtr = reinterpret_cast<int*>(new int[NumElms]);
-    HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int))));
-    for (int i = 0; i < NumElms; ++i) {
-      HstPtr[i] = InitVal;
-    }
-    HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)),
-                        hipMemcpyHostToDevice));
-    std::vector<std::thread> Thrds;
-    for (int i = 0; i < TotThrds; ++i) {
-      Thrds.push_back(std::thread(LaunchKrnl3, Dptr, NumElms, InitVal));
-    }
-
-    for (auto &thr : Thrds) {
-      if (thr.joinable()) {
-        thr.join();
-      }
-    }
-    delete[] HstPtr;
-    HIP_CHECK(hipFree(Dptr));
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
+
+  IfTestPassed = true;
+  int InitVal = 123, *Dptr = NULL, NumElms = 4096 * 8, TotThrds = 2;
+  int* HstPtr = reinterpret_cast<int*>(new int[NumElms]);
+  HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int))));
+  for (int i = 0; i < NumElms; ++i) {
+    HstPtr[i] = InitVal;
+  }
+  HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice));
+  std::vector<std::thread> Thrds;
+  for (int i = 0; i < TotThrds; ++i) {
+    Thrds.push_back(std::thread(LaunchKrnl3, Dptr, NumElms, InitVal));
+  }
+
+  for (auto& thr : Thrds) {
+    if (thr.joinable()) {
+      thr.join();
+    }
+  }
+  delete[] HstPtr;
+  HIP_CHECK(hipFree(Dptr));
 }

 //  The following section tests the scenario wherein multiple threads use their
 //  own stream to launch kernel on common Hmm memory
 TEST_CASE("Unit_hipMallocManaged_MultiThrdMultiStrm") {
-  IfTestPassed = true;
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    int NumElms = 4096*4;
-    int *Hmm1 = NULL, TotlThrds = 4, InitVal = 123;
-    int HmmMem = 1;  //  to indicate the thread that Hmm memory need to be
-    //  used inside it
-    HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int))));
-    for (int i = 0; i < NumElms; ++i) {
-      Hmm1[i] = InitVal;
-    }
-    std::vector<std::thread> Thrds;
-    for (int i = 0; i < TotlThrds; ++i) {
-      Thrds.push_back(std::thread(LaunchKrnl2, Hmm1, NumElms, InitVal, HmmMem));
-    }
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
+  }

-    for (auto &thr : Thrds) {
-      if (thr.joinable()) {
-        thr.join();
-      }
+  IfTestPassed = true;
+
+  int NumElms = 4096 * 4;
+  int *Hmm1 = NULL, TotlThrds = 4, InitVal = 123;
+  int HmmMem = 1;  //  to indicate the thread that Hmm memory need to be
+  //  used inside it
+  HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int))));
+  for (int i = 0; i < NumElms; ++i) {
+    Hmm1[i] = InitVal;
+  }
+  std::vector<std::thread> Thrds;
+  for (int i = 0; i < TotlThrds; ++i) {
+    Thrds.push_back(std::thread(LaunchKrnl2, Hmm1, NumElms, InitVal, HmmMem));
+  }
+
+  for (auto& thr : Thrds) {
+    if (thr.joinable()) {
+      thr.join();
    }
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
  }
 }


-
 //  The following section tests the scenario wherein two threads each use
 //  different kernel but common HMM memory
 TEST_CASE("Unit_hipMallocManaged_TwoKrnlsComnHmmMem") {
-  IfTestPassed = true;
-  int managed = HmmAttrPrint();
-  if (managed == 1) {
-    int InitVal = 123, *Dptr = NULL, NumElms = 4096*4, TotThrds = 2;
-    int *HstPtr = reinterpret_cast<int*>(new int[NumElms]);
-    HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int))));
-    for (int i = 0; i < NumElms; ++i) {
-      HstPtr[i] = InitVal;
-    }
-    HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)),
-                        hipMemcpyHostToDevice));
-    std::vector<std::thread> Thrds;
-    for (int i = 0; i < TotThrds; ++i) {
-      Thrds.push_back(std::thread(LaunchKrnl5, Dptr, NumElms, InitVal, i));
-    }
-
-    for (auto &thr : Thrds) {
-      if (thr.joinable()) {
-        thr.join();
-      }
-    }
-    delete[] HstPtr;
-    HIP_CHECK(hipFree(Dptr));
-  } else {
-    SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory "
-           "attribute. Hence skipping the testing with Pass result.\n");
+  auto managed = HmmAttrPrint();
+  if (managed != 1) {
+    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
+    return;
  }
+
+  IfTestPassed = true;
+  int InitVal = 123, *Dptr = NULL, NumElms = 4096 * 4, TotThrds = 2;
+  int* HstPtr = reinterpret_cast<int*>(new int[NumElms]);
+  HIP_CHECK(hipMalloc(&Dptr, (NumElms * sizeof(int))));
+  for (int i = 0; i < NumElms; ++i) {
+    HstPtr[i] = InitVal;
+  }
+  HIP_CHECK(hipMemcpy(Dptr, HstPtr, (NumElms * sizeof(int)), hipMemcpyHostToDevice));
+  std::vector<std::thread> Thrds;
+  for (int i = 0; i < TotThrds; ++i) {
+    Thrds.push_back(std::thread(LaunchKrnl5, Dptr, NumElms, InitVal, i));
+  }
+
+  for (auto& thr : Thrds) {
+    if (thr.joinable()) {
+      thr.join();
+    }
+  }
+  delete[] HstPtr;
+  HIP_CHECK(hipFree(Dptr));
 }
-
-