[hip-tests] Tag multigpu tests with Catch2 tags (#1315)

2025-11-14 13:00:30 +01:00
@@ -54,8 +54,9 @@ THE SOFTWARE.
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
-                   unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -103,8 +104,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -152,8 +154,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsig
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long, float, double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -45,8 +45,9 @@ THE SOFTWARE.
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
  for (auto current = 0; current < 1; ++current) {
    DYNAMIC_SECTION("Same address " << current) {
      Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "",
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));

@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses"
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -60,8 +60,9 @@ THE SOFTWARE.
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int,
-                   unsigned long long, unsigned short int TYPES) {
+TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "[multigpu]",
+                   int, unsigned int, unsigned long long,
+                   unsigned short int TYPES) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -105,8 +106,9 @@ TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int,
-                   unsigned long long, unsigned short int TYPES) {
+TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "[multigpu]",
+                   int, unsigned int, unsigned long long,
+                   unsigned short int TYPES) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -155,8 +157,9 @@ TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsig
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
-                   unsigned long long, unsigned short int TYPES) {
+TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs",
+                   "[multigpu]", int, unsigned int, unsigned long long,
+                   unsigned short int TYPES) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -55,11 +55,12 @@ THE SOFTWARE.
 *    - HIP_VERSION >= 5.2
 */
 #if HT_NVIDIA
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigned int,
-                   unsigned long long, float) {
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "[multigpu]",
+                   int, unsigned int, unsigned long long, float) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
 #endif  // HT_NVIDIA
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -109,12 +110,13 @@ TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigne
 *    - HIP_VERSION >= 5.2
 */
 #if HT_NVIDIA
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsigned int,
-                   unsigned long long, float) {
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "[multigpu]",
+                   int, unsigned int, unsigned long long, float) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
-#endif  // HT_NVIDIA
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
+#endif // HT_NVIDIA
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -164,11 +166,12 @@ TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsi
 *    - HIP_VERSION >= 5.2
 */
 #if HT_NVIDIA
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
-                   unsigned long long, float) {
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs",
+                   "[multigpu]", int, unsigned int, unsigned long long, float) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long, float, double) {
 #endif  // HT_NVIDIA
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -46,11 +46,13 @@ THE SOFTWARE.
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long, float, double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
 #endif
  for (auto current = 0; current < 1; ++current) {
    DYNAMIC_SECTION("Same address " << current) {
@@ -74,11 +76,13 @@ TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "",
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long, float, double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long) {
 #endif
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -105,11 +109,14 @@ TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses"
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long, float,
+    double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
 #endif
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -46,11 +46,13 @@ THE SOFTWARE.
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long, float, double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
 #endif
  for (auto current = 0; current < 1; ++current) {
    DYNAMIC_SECTION("Same address " << current) {
@@ -74,11 +76,13 @@ TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "",
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long, float, double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long) {
 #endif
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -105,11 +109,14 @@ TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses"
 *  - HIP_VERSION >= 5.2
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long, float,
+    double) {
 #else
-TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
 #endif
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -45,8 +45,9 @@ THE SOFTWARE.
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
  for (auto current = 0; current < 1; ++current) {
    DYNAMIC_SECTION("Same address " << current) {
      Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", i
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));

@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses",
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -54,8 +54,9 @@ THE SOFTWARE.
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
-                   unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -103,8 +104,9 @@ TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "[multigpu]",
+                   int, unsigned int, unsigned long, unsigned long long, float,
+                   double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -152,8 +154,9 @@ TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsig
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
-                   unsigned long, unsigned long long, float, double) {
+TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long, float, double) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -45,8 +45,9 @@ THE SOFTWARE.
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
-                   unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address",
+                   "[multigpu]", int, unsigned int, unsigned long,
+                   unsigned long long) {
  for (auto current = 0; current < 1; ++current) {
    DYNAMIC_SECTION("Same address " << current) {
      Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "",
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
+    int, unsigned int, unsigned long, unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));

@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses"
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
-                   unsigned int, unsigned long, unsigned long long) {
+TEMPLATE_TEST_CASE(
+    "Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses",
+    "[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
  int warp_size = 0;
  HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
  const auto cache_line_size = 128u;
@@ -65,7 +65,8 @@ TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Threaded_Basic") {
 *  - Platform specific (AMD)
 *  - Multithreaded GPU
 */
-TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Multithreaded_Basic") {
+TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Multithreaded_Basic",
+          "[multigpu]") {
  const unsigned int max_threads = std::thread::hardware_concurrency();
  const int device_count = HipTest::getDeviceCount();

@@ -378,7 +378,7 @@ template <typename F> static void test_cg_multi_grid_group_type(F kernel_func, i
  }
 }

-TEST_CASE("Unit_hipCGMultiGridGroupType_Basic") {
+TEST_CASE("Unit_hipCGMultiGridGroupType_Basic", "[multigpu]") {
  int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  num_devices = min(num_devices, MaxGPUs);
@@ -425,7 +425,7 @@ TEST_CASE("Unit_hipCGMultiGridGroupType_Basic") {
  }
 }

-TEST_CASE("Unit_hipCGMultiGridGroupType_Barrier") {
+TEST_CASE("Unit_hipCGMultiGridGroupType_Barrier", "[multigpu]") {
  int num_devices = 0;
  uint32_t loops = GENERATE(1, 2, 3, 4);
  uint32_t warps = GENERATE(4, 8, 16, 32);
@@ -130,7 +130,7 @@ __global__ void test_gws(uint* buf, uint buf_size, long* tmp_buf, long* result)
  }
 }

-TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Basic") {
+TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Basic", "[multigpu]") {
  constexpr uint num_kernel_args = 4;

  int device_num = 0;
@@ -154,7 +154,7 @@ static void get_multi_grid_dims(dim3& grid_dim, dim3& block_dim, unsigned int de
 *  - HIP_VERSION >= 5.2
 *  - Devices support cooperative multi device launch
 */
-TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic") {
+TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic", "[multigpu]") {
  int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  num_devices = min(num_devices, kMaxGPUs);
@@ -302,7 +302,7 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic") {
 *  - HIP_VERSION >= 5.2
 *  - Devices support cooperative multi device launch
 */
-TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type") {
+TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type", "[multigpu]") {
  int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  num_devices = min(num_devices, kMaxGPUs);
@@ -423,7 +423,8 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type") {
 *  - HIP_VERSION >= 5.2
 *  - Devices support cooperative multi device launch
 */
-TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions") {
+TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions",
+          "[multigpu]") {
  int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  num_devices = min(num_devices, kMaxGPUs);
@@ -535,7 +536,7 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions") {
 *  - HIP_VERSION >= 5.2
 *  - Devices support cooperative multi device launch
 */
-TEST_CASE("Unit_Multi_Grid_Group_Positive_Sync") {
+TEST_CASE("Unit_Multi_Grid_Group_Positive_Sync", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
@@ -46,7 +46,7 @@ THE SOFTWARE.
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive") {
+TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive", "[multigpu]") {
  int canAccessPeer = 0;
  int deviceCount = HipTest::getGeviceCount();
  if (deviceCount < 2) {
@@ -95,7 +95,7 @@ TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive") {
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative") {
+TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative", "[multigpu]") {
  int deviceCount = HipTest::getGeviceCount();
  if (deviceCount < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -159,7 +159,7 @@ TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative") {
 *  - Multi-device
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipDeviceDisablePeerAccess_negative") {
+TEST_CASE("Unit_hipDeviceDisablePeerAccess_negative", "[multigpu]") {
  int deviceCount = HipTest::getGeviceCount();
  if (deviceCount < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -210,7 +210,8 @@ static inline std::vector<int> parseVisibleDevices() {
 * ------------------------
 *  - HIP_VERSION >= 5.7
 */
-TEST_CASE("Unit_hipDeviceName_gcnArchName_And_rocm_agent_enumerator") {
+TEST_CASE("Unit_hipDeviceName_gcnArchName_And_rocm_agent_enumerator",
+          "[multigpu]") {
  int deviceCount = 0;
  HIP_CHECK(hipGetDeviceCount(&deviceCount));
  if (deviceCount <= 0) {
@@ -145,7 +145,7 @@ static inline std::vector<int> parseVisibleDevices() {
 * ------------------------
 *  - HIP_VERSION >= 5.7
 */
-TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo") {
+TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo", "[multigpu]") {
  int deviceCount = 0;
  HIP_CHECK(hipGetDeviceCount(&deviceCount));
  assert(deviceCount > 0);
@@ -219,7 +219,8 @@ TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo") {
 */
 // Guarding it against NVIDIA as this test is faling on it.
 #if HT_AMD
-TEST_CASE("Unit_hipDeviceGetUuid_VerifyUuidFrm_hipGetDeviceProperties") {
+TEST_CASE("Unit_hipDeviceGetUuid_VerifyUuidFrm_hipGetDeviceProperties",
+          "[multigpu]") {
  int deviceCount = 0;
  hipDevice_t device;
  hipDeviceProp_t prop;
@@ -462,7 +462,7 @@ void getMinMaxCurrentAndSetCurrent() {
 * ------------------------
 *  - HIP_VERSION >= 6.5
 */
-TEST_CASE("Unit_hipDeviceGetSetLimit_Scratch_MultiDevice") {
+TEST_CASE("Unit_hipDeviceGetSetLimit_Scratch_MultiDevice", "[multigpu]") {
  int deviceCount = 0;
  HIP_CHECK(hipGetDeviceCount(&deviceCount));
  if (deviceCount < 2) {
@@ -65,7 +65,7 @@ static bool testSetLimitFunc(hipLimit_t limit_to_test) {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipDeviceSetLimit_SetGet") {
+TEST_CASE("Unit_hipDeviceSetLimit_SetGet", "[multigpu]") {
  size_t value = 0;
  // Scenario1
  SECTION("Set Get Test hipLimitStackSize") {
@@ -115,7 +115,7 @@ TEST_CASE("Unit_hipDeviceTotalMem_ValidateTotalMem") {
 *  - Multi-device test
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipDeviceTotalMem_NonSelectedDevice") {
+TEST_CASE("Unit_hipDeviceTotalMem_NonSelectedDevice", "[multigpu]") {
  auto deviceCount = HipTest::getDeviceCount();
  if (deviceCount < 2) {
    HipTest::HIP_SKIP_TEST("Multi Device Test, will not run on single gpu systems. Skipping.");
@@ -148,7 +148,7 @@ static void validateDeviceMacro(int* archProp_h, hipDeviceProp_t* prop) {
 *  - Platform specific (AMD)
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst") {
+TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst", "[multigpu]") {
  int *archProp_h, *archProp_d;
  archProp_h = new int[NUM_OF_ARCHPROP];
  hipDeviceProp_t prop;
@@ -372,7 +372,7 @@ TEST_CASE("Unit_hipGetProcAddress_ValidateDeviceApis") {
 *  - HIP_VERSION >= 6.2
 */

-TEST_CASE("Unit_hipGetProcAddress_PeerDeviceAccessAPIs") {
+TEST_CASE("Unit_hipGetProcAddress_PeerDeviceAccessAPIs", "[multigpu]") {
  void* hipDeviceCanAccessPeer_ptr = nullptr;
  void* hipSetDevice_ptr = nullptr;
  void* hipGetDevice_ptr = nullptr;
@@ -453,7 +453,7 @@ bool CheckMemPoolSupport(const int device) {
  return true;
 }

-TEST_CASE("Unit_hipGetProcAddress_SetGetMemPoolAPIs") {
+TEST_CASE("Unit_hipGetProcAddress_SetGetMemPoolAPIs", "[multigpu]") {
  void* hipDeviceSetMemPool_ptr = nullptr;
  void* hipDeviceGetMemPool_ptr = nullptr;
  int currentHipVersion = 0;
@@ -44,7 +44,7 @@ THE SOFTWARE.
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipSetDevice_BasicSetGet") {
+TEST_CASE("Unit_hipSetDevice_BasicSetGet", "[multigpu]") {
  int numDevices = 0;
  int device{};
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -73,7 +73,7 @@ TEST_CASE("Unit_hipSetDevice_BasicSetGet") {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipGetSetDevice_MultiThreaded") {
+TEST_CASE("Unit_hipGetSetDevice_MultiThreaded", "[multigpu]") {
  auto maxThreads = std::thread::hardware_concurrency();
  auto deviceCount = HipTest::getDeviceCount();

@@ -126,7 +126,7 @@ TEST_CASE("Unit_hipGetSetDevice_MultiThreaded") {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipSetGetDevice_Positive_Threaded_Basic") {
+TEST_CASE("Unit_hipSetGetDevice_Positive_Threaded_Basic", "[multigpu]") {
  class HipSetGetDeviceThreadedTest : public ThreadedZigZagTest<HipSetGetDeviceThreadedTest> {
   public:
    void TestPart1() { HIP_CHECK(hipSetDevice(0)); }
@@ -158,7 +158,7 @@ TEST_CASE("Unit_hipSetValidDevices_Negative_Length_Lessthan_DeviceArrSize") {
 * ------------------------
 *  - HIP_VERSION >= 7.1
 */
-TEST_CASE("Unit_hipSetValidDevices_Positive_Basic") {
+TEST_CASE("Unit_hipSetValidDevices_Positive_Basic", "[multigpu]") {
  int totalDevices = HipTest::getDeviceCount();
  if (totalDevices < 2) {
    HipTest::HIP_SKIP_TEST("This test requires 2 or more GPUs. Skipping.");
@@ -51,7 +51,7 @@ __global__ void gpu_round_robin(const int id, const int num_dev, const int num_i
  round_robin(id, num_dev, num_iter, data, flag);
 }

-TEST_CASE("Unit_threadfence_system") {
+TEST_CASE("Unit_threadfence_system", "[multigpu]") {
  int num_gpus = 0;
  HIP_CHECK(hipGetDeviceCount(&num_gpus));
  REQUIRE(num_gpus > 0);
@@ -93,7 +93,7 @@ TEST_CASE("Unit_hipExtGetLastError_Positive_Threaded") {
 *  - HIP_VERSION >= 6.4
 */

-TEST_CASE("Unit_hipExtGetLastError_with_hipMemcpyPeerAsync") {
+TEST_CASE("Unit_hipExtGetLastError_with_hipMemcpyPeerAsync", "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();
  if (device_count < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -94,7 +94,7 @@ TEST_CASE("Unit_hipGetLastError_Positive_Threaded") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipGetLastError_with_hipMemcpyPeerAsync") {
+TEST_CASE("Unit_hipGetLastError_with_hipMemcpyPeerAsync", "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();
  if (device_count < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -102,7 +102,7 @@ TEST_CASE("Unit_hipGetLastError_KernelFailure_ValidAndInvalidOperations") {
 * ------------------------
 *  - HIP_VERSION >= 7.0
 */
-TEST_CASE("Unit_hipGetLastError_KernelFailure_TwoDevices") {
+TEST_CASE("Unit_hipGetLastError_KernelFailure_TwoDevices", "[multigpu]") {
  int deviceCount = 0;
  HIP_CHECK(hipGetDeviceCount(&deviceCount));
  if (deviceCount < 2) {
@@ -108,7 +108,7 @@ TEST_CASE("Unit_hipEventElapsedTime_DisableTiming") {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipEventElapsedTime_DifferentDevices") {
+TEST_CASE("Unit_hipEventElapsedTime_DifferentDevices", "[multigpu]") {
  int devCount = 0;
  HIP_CHECK(hipGetDeviceCount(&devCount));
  if (devCount > 1) {
@@ -217,7 +217,7 @@ TEST_CASE("Unit_hipEventMGpuMThreads_1") { testEventMGpuMThreads(1); }
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipEventMGpuMThreads_2") {
+TEST_CASE("Unit_hipEventMGpuMThreads_2", "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
  if (numDevices > 1) {
@@ -238,7 +238,7 @@ TEST_CASE("Unit_hipEventMGpuMThreads_2") {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipEventMGpuMThreads_3") {
+TEST_CASE("Unit_hipEventMGpuMThreads_3", "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
  if (numDevices > 1) {
@@ -41,7 +41,7 @@ THE SOFTWARE.
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipEventQuery_DifferentDevice") {
+TEST_CASE("Unit_hipEventQuery_DifferentDevice", "[multigpu]") {
  hipEvent_t event1{}, event2{};
  HIP_CHECK(hipEventCreate(&event1));
  HIP_CHECK(hipEventCreate(&event2));
@@ -158,7 +158,7 @@ TEST_CASE("Unit_hipEventRecord") {
 * ------------------------
 *  - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipEventRecord_Negative") {
+TEST_CASE("Unit_hipEventRecord_Negative", "[multigpu]") {
  SECTION("Nullptr event") {
    HIP_CHECK_ERROR(hipEventRecord(nullptr, nullptr), hipErrorInvalidResourceHandle);
  }
@@ -27,7 +27,8 @@ THE SOFTWARE.
 #include <resource_guards.hh>
 #include <utils.hh>

-TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
+TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic",
+          "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();

  std::vector<hipLaunchParams> params_list(device_count);
@@ -54,7 +55,8 @@ TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
  }
 }

-TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters") {
+TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
+          "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();

  std::vector<hipLaunchParams> params_list(device_count);
@@ -27,7 +27,8 @@ THE SOFTWARE.
 #include <resource_guards.hh>
 #include <utils.hh>

-TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
+TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic",
+          "[multigpu]") {
  if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
    HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
    return;
@@ -59,7 +60,8 @@ TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
  }
 }

-TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters") {
+TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
+          "[multigpu]") {
  if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
    HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
    return;
@@ -309,7 +309,8 @@ TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional") {
  Unit_hipDeviceGetGraphMemAttribute_Functional();
 }

-TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional_Multi_Device") {
+TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional_Multi_Device",
+          "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));

@@ -368,7 +368,7 @@ TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_test") {
 *  - HIP_VERSION >= 6.1
 */

-TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice") {
+TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -1083,7 +1083,7 @@ TEST_CASE("Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph") {
 in multi GPU environment. Create one nested graph per GPU context. Execute
 all the created graphs in their respective GPUs and validate the output.
 */
-TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU") {
+TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU", "[multigpu]") {
  int devcount = 0;
  HIP_CHECK(hipGetDeviceCount(&devcount));
  // If only single GPU is detected then return
@@ -584,7 +584,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_1") {
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2") {
+TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2", "[multigpu]") {
  int mem_pool_support = 0;
  HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
  if (!mem_pool_support) {
@@ -653,7 +653,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2") {
 * ------------------------
 * - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3") {
+TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3", "[multigpu]") {
  int mem_pool_support = 0;
  HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
  if (!mem_pool_support) {
@@ -727,7 +727,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3") {
 * ------------------------
 * - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_4") {
+TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_4", "[multigpu]") {
  int mem_pool_support = 0;
  HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
  if (!mem_pool_support) {
@@ -115,7 +115,7 @@ static void validateMemcpyNode1DArray(bool peerAccess,
 * For Peer device test: Memory allocations happen on device(0) and memcpy operations
 * are performed from device(1).
 */
-TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
+TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional", "[multigpu]") {
  SECTION("Memcpy with 1D array on default device") { validateMemcpyNode1DArray(false); }
  SECTION("Memcpy with 1D array using DeviceToDeviceNoCU") {
    validateMemcpyNode1DArray(false, hipMemcpyDeviceToDeviceNoCU);
@@ -255,7 +255,8 @@ in GPU-0 and add the MemcpyNodeFromSymbol node to the graph and
 verifying the result in GPU-1
 */
 #if HT_NVIDIA
-TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryPeerDevice") {
+TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryPeerDevice",
+          "[multigpu]") {
  int numDevices = 0;
  int canAccessPeer = 0;
  if (numDevices > 1) {
@@ -276,7 +277,8 @@ in GPU-0 and add the MemcpyNodeFromSymbol node to the graph and
 verifying the result in GPU-1
 */

-TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemoryPeerDevice") {
+TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemoryPeerDevice",
+          "[multigpu]") {
  int numDevices = 0;
  int canAccessPeer = 0;
  if (numDevices > 1) {
@@ -257,7 +257,8 @@ This testcase verifies allocating global const symbol memory and device variable
 in GPU-0 and add the MemcpyNodeToSymbol node to the graph and
 verifying the result in GPU-1
 */
-TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemoryPeerDevice") {
+TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemoryPeerDevice",
+          "[multigpu]") {
  int numDevices = 0;
  int canAccessPeer = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -278,7 +279,8 @@ This testcaser verifies allocating global memory,
 Add MemcpyToSymbolNode,KernelNode and memcpynode and validating
 the behaviour
 */
-TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") {
+TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel",
+          "[multigpu]") {
  constexpr size_t Nbytes = SIZE * sizeof(int);
  constexpr auto blocksPerCU = 6;  // to hide latency
  constexpr auto threadsPerBlock = 256;
@@ -484,7 +484,7 @@ TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
 * are performed from device(1).
 * Tests also verify memcpy node addition with 1D, 2D and 3D objects.
 */
-TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
+TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices{}, peerAccess{};
@@ -229,7 +229,7 @@ This testcase verifies following scenarios
   validate the result of the cloned graph
 3. Device context change for cloned graph
 */
-TEST_CASE("Unit_hipGraphClone_Functional") {
+TEST_CASE("Unit_hipGraphClone_Functional", "[multigpu]") {
  SECTION("hipGraphClone Basic Functionality") { hipGraphClone_Func(); }
  SECTION("hipGraphClone Modify Original graph") { hipGraphClone_Func(true); }

@@ -1490,7 +1490,7 @@ TEST_CASE("Unit_hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec") {
 Execute both original graph and cloned graph in loop: with multiple device.
 Loop: Update input data -> Launch Graph -> Validate output data -> Goto Loop */

-TEST_CASE("Unit_hipGraphClone_address_change_in_loop") {
+TEST_CASE("Unit_hipGraphClone_address_change_in_loop", "[multigpu]") {
  constexpr size_t Nbytes = N * sizeof(int);
  constexpr auto blocksPerCU = 6;  // to hide latency
  constexpr auto threadsPerBlock = 256;
@@ -1644,7 +1644,7 @@ static void hipGraphClone_address_change_in_thread(hipGraph_t* graph, hipGraphNo
 memory addresses in each Node and create executable graphs.
 Launch the graphs in their respective GPUs. Validate the outputs. */

-TEST_CASE("Unit_hipGraphClone_address_change_in_thread") {
+TEST_CASE("Unit_hipGraphClone_address_change_in_thread", "[multigpu]") {
  constexpr size_t Nbytes = N * sizeof(int);
  constexpr auto blocksPerCU = 6;  // to hide latency
  constexpr auto threadsPerBlock = 256;
@@ -1735,7 +1735,7 @@ static void hipGraphClone_Test_All_API(int dev) {
 Create a graph with Memcpy and Kernel nodes. and its cloned graph.
 Run all the above writen test cases for multiple GPU scenarios */

-TEST_CASE("Unit_hipGraphClone_multi_GPU_test") {
+TEST_CASE("Unit_hipGraphClone_multi_GPU_test", "[multigpu]") {
  // FIXME: This test tests 3D as well, decouple it
  CHECK_IMAGE_SUPPORT

@@ -187,7 +187,8 @@ TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") {
 * Scenario 3: This test verifies event in node of the executable graph can be changed to event on
 * different device
 */
-TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_Positive_DifferentDevices") {
+TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_Positive_DifferentDevices",
+          "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();
  if (device_count < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -142,7 +142,8 @@ TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic") {
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative_Parameters") {
+TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative_Parameters",
+          "[multigpu]") {
  using namespace std::placeholders;
  hipGraph_t graph = nullptr;
  HIP_CHECK(hipGraphCreate(&graph, 0));
@@ -129,7 +129,8 @@ TEMPLATE_TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic", "", ui
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Negative_Parameters") {
+TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Negative_Parameters",
+          "[multigpu]") {
  // FIXME: this test tests 1D/2D/3D stuff in one single go, need to decouple it so that it can run
  // on devices with no image support
  CHECK_IMAGE_SUPPORT
@@ -639,7 +639,8 @@ TEST_CASE("Unit_hipGraphExecUpdate_Negative_NodeType_Changed") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed") {
+TEST_CASE("Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
+          "[multigpu]") {
  constexpr size_t N = 1024;
  constexpr size_t Nbytes = N * sizeof(int);
  constexpr auto blocksPerCU = 6;  // to hide latency
@@ -269,7 +269,8 @@ This testcase verifies hipGraphInstantiateWithFlags API
 by creating dependency graph on GPU-0 and instantiate, launching and verifying
 the result on GPU-1
 */
-TEST_CASE("Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg") {
+TEST_CASE("Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
+          "[multigpu]") {
  int numDevices = 0;
  int canAccessPeer = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -311,7 +312,8 @@ This testcase verifies hipGraphInstantiateWithFlags API
 by creating capture graph on GPU-0 and instantiate, launching and verifying
 the result on GPU-1
 */
-TEST_CASE("Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg") {
+TEST_CASE("Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg",
+          "[multigpu]") {
  int numDevices = 0;
  int canAccessPeer = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -254,7 +254,7 @@ static void hipGraphLaunch_test() {
  HIP_CHECK(hipStreamDestroy(streamForGraph));
 }

-TEST_CASE("Unit_hipGraphLaunch_Functional_multidevice_test") {
+TEST_CASE("Unit_hipGraphLaunch_Functional_multidevice_test", "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));

@@ -135,7 +135,8 @@ TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional") {
  hipGraphMemAllocNodeGetParams_Functional();
 }

-TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice") {
+TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
+          "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));

@@ -614,7 +614,8 @@ static void hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams(const hipStream_t
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -732,7 +733,8 @@ static void hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop(const hipS
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -954,7 +956,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop(const hipS
 *  - HIP_VERSION >= 6.1
 */

-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -1070,7 +1073,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop(const hi
 *  - HIP_VERSION >= 6.1
 */

-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -1178,7 +1182,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol(const hi
 *  - HIP_VERSION >= 6.1
 */

-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -1285,7 +1290,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol(const hip
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -1438,7 +1444,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams(const hipStream_t
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -1871,7 +1878,8 @@ static void hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams_mKernel(
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -2018,7 +2026,8 @@ static void hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent(const hipStre
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -2205,7 +2214,8 @@ static void hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent(const hipStream
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -2359,7 +2369,8 @@ static void hipGraph_PerfCheck_hipGraphExecHostNodeSetParams(const hipStream_t&
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecHostNodeSetParams") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecHostNodeSetParams",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -2482,7 +2493,7 @@ static void hipGraph_PerfCheck_hipGraphExecUpdate(const hipStream_t& stream) {
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate", "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -2626,7 +2637,8 @@ static void hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop(const hipStream_
 * ------------------------
 *  - HIP_VERSION >= 6.1
 */
-TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop") {
+TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop",
+          "[multigpu]") {
  if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
    HipTest::HIP_SKIP_TEST(
        "Unable to turn on "
@@ -152,7 +152,7 @@ TEST_CASE("Unit_hipGraphUpload_Functional") {
  }
 }

-TEST_CASE("Unit_hipGraphUpload_Functional_multidevice_test") {
+TEST_CASE("Unit_hipGraphUpload_Functional_multidevice_test", "[multigpu]") {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));

@@ -1064,7 +1064,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
+TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU", "[multigpu]") {
  int devcount = 0;
  HIP_CHECK(hipGetDeviceCount(&devcount));
  // If only single GPU is detected then return
@@ -931,7 +931,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_EndingCapturewhenCaptureInProgress") {

 /* Test scenario 15
 */
-TEST_CASE("Unit_hipStreamBeginCapture_MultiGPU") {
+TEST_CASE("Unit_hipStreamBeginCapture_MultiGPU", "[multigpu]") {
  int devcount = 0;
  HIP_CHECK(hipGetDeviceCount(&devcount));
  // If only single GPU is detected then return
@@ -45,7 +45,7 @@ __global__ void run_printf() { printf("Hello World\n"); }
 * ------------------------
 * - HIP_VERSION >= 5.6
 */
-TEST_CASE("Unit_kernel_ChkPrintf") {
+TEST_CASE("Unit_kernel_ChkPrintf", "[multigpu]") {
  int device_count = 0;
  CaptureStream capture(stdout);
  HIP_CHECK(hipGetDeviceCount(&device_count));
@@ -85,7 +85,7 @@ This testcase verifies the hipArrayCreate API in multithreaded
 scenario by launching threads in parallel on multiple GPUs
 and verifies the hipArrayCreate API with small and big chunks data
 */
-TEST_CASE("Unit_hipArrayCreate_MultiThread") {
+TEST_CASE("Unit_hipArrayCreate_MultiThread", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  std::vector<std::thread> threadlist;
@@ -233,7 +233,7 @@ float* funcToChkArray(hipArray_t array) {
 * ------------------------
 * - HIP_VERSION >= 5.6
 */
-TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk") {
+TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -318,7 +318,8 @@ TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk") {
 * ------------------------
 * - HIP_VERSION >= 5.6
 */
-TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array") {
+TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -368,7 +369,7 @@ TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array") {
 * ------------------------
 * - HIP_VERSION >= 5.6
 */
-TEST_CASE("Unit_hipArrayGetDescriptor_Host2Array_Array2Host") {
+TEST_CASE("Unit_hipArrayGetDescriptor_Host2Array_Array2Host", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -137,7 +137,7 @@ TEST_CASE("Unit_hipDeviceGetMemPool_Functional") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice") {
+TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice", "[multigpu]") {
  int num_devices;
  HIP_CHECK(hipGetDeviceCount(&num_devices));

@@ -108,7 +108,7 @@ TEST_CASE("Unit_hipDeviceSetMemPool_Basic") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool") {
+TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool", "[multigpu]") {
  int num_devices;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  for (int dev = 0; dev < num_devices; dev++) {
@@ -269,7 +269,7 @@ TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_FuncTst") {
 * ------------------------
 *  - HIP_VERSION >= 6.0
 */
-TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_Positive_Basic") {
+TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_Positive_Basic", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  SECTION("Device to Device") {
@@ -542,7 +542,7 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_ExtentValidation") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange") {
+TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -567,7 +567,8 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange") {
+TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -595,7 +596,8 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3DAsync_multiDevice_Basic_Size_Test") {
+TEST_CASE("Unit_hipDrvMemcpy3DAsync_multiDevice_Basic_Size_Test",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  constexpr int size_128b = 128, size_256b = 256;
  int numDevices = 0;
@@ -524,7 +524,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_ExtentValidation") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange") {
+TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -549,7 +549,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange") {
+TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -577,7 +577,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipDrvMemcpy3D_multiDevice_Basic_Size_Test") {
+TEST_CASE("Unit_hipDrvMemcpy3D_multiDevice_Basic_Size_Test", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  constexpr int size_128b = 128, size_256b = 256;
  int numDevices = 0;
@@ -2705,7 +2705,7 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisGetMemInfoRelated") {
 * ------------------------
 *  - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemcpy2DRelated") {
+TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemcpy2DRelated", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  void* hipMemcpy2D_ptr = nullptr;
@@ -6008,7 +6008,7 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisStreamOrderedMemory") {
 * ------------------------
 *  - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipGetProcAddress_MemoryApisPeerToPeer") {
+TEST_CASE("Unit_hipGetProcAddress_MemoryApisPeerToPeer", "[multigpu]") {
  int deviceCount = 0;
  HIP_CHECK(hipGetDeviceCount(&deviceCount));

@@ -107,7 +107,8 @@ void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd, bool internal
 * ------------------------
 *    - HIP_VERSION >= 5.2
 */
-TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset",
+                   "[multigpu]", int, float, double) {
  size_t sizeBytes{LEN * sizeof(TestType)};
  TestType *A, **Ad;
  int num_devices = 0;
@@ -214,7 +215,8 @@ TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceFromKernel", "", int, fl
 * ------------------------
 *    - HIP_VERSION >= 5.6
 */
-TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceMultGpu", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceMultGpu", "[multigpu]",
+                   int, float, double) {
  // 1 refers to doing hipHostRegister once for all devices
  // 0 refers to doing hipHostRegister for each device
  auto register_once = GENERATE(0, 1);
@@ -127,7 +127,7 @@ This testcase verifies the hipMalloc3D API in multithreaded
 scenario by launching threads in parallel on multiple GPUs
 and verifies the hipMalloc3D API with small and big chunks data
 */
-TEST_CASE("Unit_hipMalloc3D_MultiThread") {
+TEST_CASE("Unit_hipMalloc3D_MultiThread", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  std::vector<std::thread> threadlist;
@@ -78,7 +78,7 @@ This testcase verifies the hipMalloc3DArray API in multithreaded
 scenario by launching threads in parallel on multiple GPUs
 and verifies the hipMalloc3DArray API with small and big chunks data
 */
-TEST_CASE("Unit_hipMalloc3DArray_MultiThread") {
+TEST_CASE("Unit_hipMalloc3DArray_MultiThread", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  std::vector<std::thread> threadlist;
@@ -80,7 +80,7 @@ This testcase verifies the hipMallocArray API in multithreaded
 scenario by launching threads in parallel on multiple GPUs
 and verifies the hipMallocArray API with small and big chunks data
 */
-TEST_CASE("Unit_hipMallocArray_MultiThread") {
+TEST_CASE("Unit_hipMallocArray_MultiThread", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  std::vector<std::thread> threadlist;
@@ -290,7 +290,7 @@ TEST_CASE("Unit_hipMallocAsync_StreamEvent_CrissCross") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMallocAsync_Multidevice") {
+TEST_CASE("Unit_hipMallocAsync_Multidevice", "[multigpu]") {
  int num_devices;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  for (int i = 0; i < num_devices; i++) {
@@ -330,7 +330,7 @@ static void threadQAsyncCommands(streamMemAllocTest* testObj, hipStream_t strm,
  testObj->freeDevBuf(strm);
 }

-TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") {
+TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent", "[multigpu]") {
  int num_devices;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  checkIfMultiDev(num_devices) hipStream_t* stream_buf = new hipStream_t[num_devices];
@@ -379,7 +379,7 @@ TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream") {
+TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream", "[multigpu]") {
  int num_devices;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  checkIfMultiDev(num_devices)
@@ -303,7 +303,7 @@ TEST_CASE("Unit_hipMalloc_AllocateAndPoolBuffers") {
 * Exercise hipMalloc() api parellely on all gpus from
 * multiple threads and regress the api.
 */
-TEST_CASE("Unit_hipMalloc_Multithreaded_MultiGPU") {
+TEST_CASE("Unit_hipMalloc_Multithreaded_MultiGPU", "[multigpu]") {
  std::vector<std::thread> threadlist;
  int devCnt;

@@ -340,7 +340,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_hipStreamPerThread") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu") {
+TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu", "[multigpu]") {
  constexpr int N = 1 << 20;
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -575,7 +575,7 @@ static bool checkReuseAllowOtherFlags(int N, hipMemPoolAttr attr, enum eTestValu
 *    - HIP_VERSION >= 6.2
 */
 #if HT_AMD
-TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") {
+TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent", "[multigpu]") {
  auto testType = GENERATE(testdefault, testMaximum);
  constexpr int N = 1 << 20;
  int num_devices;
@@ -627,7 +627,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream") {
+TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream", "[multigpu]") {
  int num_devices;
  auto testType = GENERATE(testdefault, testMaximum);
  constexpr int N = 1 << 20;
@@ -31,7 +31,7 @@ __global__ void MallcMangdFlgTst(int n, float* x, float* y) {
 }

 // The following section tests working of hipMallocManaged with flag parameters
-TEST_CASE("Unit_hipMallocManaged_FlgParam") {
+TEST_CASE("Unit_hipMallocManaged_FlgParam", "[multigpu]") {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -119,7 +119,7 @@ TEST_CASE("Unit_hipMallocManaged_FlgParam") {

 // The following function tests Memory access allocated using hipMallocManaged
 // in multiple streams
-TEST_CASE("Unit_hipMallocManaged_AccessMultiStream") {
+TEST_CASE("Unit_hipMallocManaged_AccessMultiStream", "[multigpu]") {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -161,7 +161,7 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") {
 // Equal parts of Hmm is accessed on available gpus and
 // kernel is launched on acessed chunk of hmm memory
 // and checks if there are any inconsistencies or access issues
-TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
+TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice", "[multigpu]") {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -304,7 +304,8 @@ TEST_CASE("Unit_hipMallocManaged_Negative") {
 // Allocate two pointers using hipMallocManaged(), initialize,
 // then launch kernel using these pointers directly and
 // later validate the content without using any Memcpy.
-TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "[multigpu]", int,
+                   float, double) {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -344,8 +345,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double)
 // to all other devices. This include verification and Device two Device
 // transfers and kernel launch o discover if there any access issues.

-TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", unsigned char, int, float,
-                   double) {
+TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "[multigpu]",
+                   unsigned char, int, float, double) {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -85,7 +85,7 @@ This testcase verifies the hipMallocMipmappedArray API in multithreaded
 scenario by launching threads in parallel on multiple GPUs
 and verifies the hipMallocMipmappedArray API with small and big chunks data
 */
-TEST_CASE("Unit_hipMallocMipmappedArray_MultiThread") {
+TEST_CASE("Unit_hipMallocMipmappedArray_MultiThread", "[multigpu]") {
  std::vector<std::thread> threadlist;
  int devCnt = 0;
  devCnt = HipTest::getDeviceCount();
@@ -297,7 +297,7 @@ static void AllocateHmmMemory(int flag, int device) {
  }
 }

-TEST_CASE("Unit_hipMallocManaged_MultiThread") {
+TEST_CASE("Unit_hipMallocManaged_MultiThread", "[multigpu]") {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -351,7 +351,7 @@ TEST_CASE("Unit_hipMallocManaged_MultiThread") {

 // The following test checks what happens when same Hmm memory is used to
 // launch multiple threads over multiple gpus
-TEST_CASE("Unit_hipMallocManaged_MGpuMThread") {
+TEST_CASE("Unit_hipMallocManaged_MGpuMThread", "[multigpu]") {
  auto managed = HmmAttrPrint();
  if (managed != 1) {
    HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -459,7 +459,7 @@ scenario by launching threads in parallel on multiple GPUs
 and verifies the hipMallocPitch API with small and big chunks data
 */

-TEST_CASE("Unit_hipMallocPitch_MultiThread", "") {
+TEST_CASE("Unit_hipMallocPitch_MultiThread", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  std::vector<std::thread> threadlist;
@@ -181,7 +181,7 @@ TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") {
 #endif
 }

-TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") {
+TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise", "[multigpu]") {
  auto supported_devices = GetDevicesWithAdviseSupport();
  if (supported_devices.empty()) {
    HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory");
@@ -243,7 +243,7 @@ TEST_CASE("Unit_hipMemAdvise_NegtveTsts") {

 // The following function tests various scenarios around the flag
 // 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api
-TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") {
+TEST_CASE("Unit_hipMemAdvise_PrefrdLoc", "[multigpu]") {
  int MangdMem = HmmAttrPrint();
  if (MangdMem == 1) {
    // Check that when a page fault occurs for the memory region set to devPtr,
@@ -428,7 +428,7 @@ TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") {
 // The following function tests if peers can set hipMemAdviseSetAccessedBy flag
 // on HMM memory prefetched on each of the other gpus
 #if HT_AMD
-TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") {
+TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer", "[multigpu]") {
  int MangdMem = HmmAttrPrint();
  if (MangdMem == 1) {
    bool IfTestPassed = true;
@@ -732,7 +732,7 @@ TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") {
   to device1, probe for hipMemRangeAttributeLastPrefetchLocation using
   hipMemRangeGetAttribute(), we should get 1*/

-TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") {
+TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc", "[multigpu]") {
  int NumDevs = 0;
  HIP_CHECK(hipGetDeviceCount(&NumDevs));
  if (NumDevs >= 2) {
@@ -802,7 +802,7 @@ TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") {
  access denial case arising due to setting ReadMostly only to a particular
  gpu*/

-TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") {
+TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst", "[multigpu]") {
  int managed = HmmAttrPrint();
  if (managed == 1) {
    int Ngpus = 0;
@@ -70,7 +70,7 @@ static std::vector<int> getSupportedDevices() {
 * ------------------------
 *  - HIP_VERSION >= 7.1
 */
-TEST_CASE("Unit_hipMemAdvise_v2_Device_Host") {
+TEST_CASE("Unit_hipMemAdvise_v2_Device_Host", "[multigpu]") {
  auto supportedDevices = getSupportedDevices();
  if (supportedDevices.empty()) {
    HipTest::HIP_SKIP_TEST(
@@ -93,7 +93,7 @@ TEST_CASE("Unit_hipMemAllocHost_Negative") {
 /*
 * Verify that a device can read/write to the memory of another device
 */
-TEST_CASE("Unit_hipMemAllocHost_VerifyAccess") {
+TEST_CASE("Unit_hipMemAllocHost_VerifyAccess", "[multigpu]") {
  int devices_number = 0;
  HIP_CHECK(hipGetDeviceCount(&devices_number));
  std::vector<int*> devices_memories(devices_number);
@@ -157,7 +157,7 @@ static __global__ void setKer(int* devptr) {
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMemPoolCreate_DeviceTest") {
+TEST_CASE("Unit_hipMemPoolCreate_DeviceTest", "[multigpu]") {
  checkMempoolSupported(0) int num_devices = 0;
  HIP_CHECK(hipGetDeviceCount(&num_devices));
  checkIfMultiDev(num_devices)
@@ -98,7 +98,7 @@ int CheckP2PMemPoolSupport(int src_device, int dst_device) {
 * ------------------------
 *  - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") {
+TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU", "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();
  if (device_count < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -212,7 +212,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) {
 * ------------------------
 *  - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") {
+TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P", "[multigpu]") {
  const auto device_count = HipTest::getDeviceCount();
  if (device_count < 2) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -406,7 +406,7 @@ static void getDevicePairs(std::vector<std::pair<int, int>>* p2p_pairs, int numD
 * ------------------------
 *    - HIP_VERSION >= 6.2
 */
-TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess") {
+TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess", "[multigpu]") {
  constexpr int N = 1 << 14;
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -47,7 +47,7 @@ __global__ void MemPrefetchAsyncKernel(int* C_d, const int* A_d, size_t N) {
  }
 }

-TEST_CASE("Unit_hipMemPrefetchAsync_Basic") {
+TEST_CASE("Unit_hipMemPrefetchAsync_Basic", "[multigpu]") {
  const auto supported_devices = GetDevicesWithPrefetchSupport();
  if (supported_devices.empty()) {
    HipTest::HIP_SKIP_TEST("Test need at least one device with managed memory support");
@@ -70,7 +70,7 @@ static std::vector<int> getSupportedDevices() {
 * ------------------------
 *  - HIP_VERSION >= 7.1
 */
-TEST_CASE("Unit_hipMemPrefetchAsync_v2_Device_Host") {
+TEST_CASE("Unit_hipMemPrefetchAsync_v2_Device_Host", "[multigpu]") {
  auto supportedDevices = getSupportedDevices();
  if (supportedDevices.empty()) {
    HipTest::HIP_SKIP_TEST(
@@ -27,7 +27,7 @@ THE SOFTWARE.
 #include <resource_guards.hh>
 #include <utils.hh>

-TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") {
+TEST_CASE("Unit_hipMemcpy2D_Positive_Basic", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  constexpr bool async = false;

@@ -27,7 +27,7 @@ THE SOFTWARE.
 #include <resource_guards.hh>
 #include <utils.hh>

-TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") {
+TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic", "[multigpu]") {
  using namespace std::placeholders;

  constexpr bool async = true;
@@ -172,7 +172,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "", int, float, doubl
 *  - HIP_VERSION >= 5.2
 */

-TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem",
+                   "[multigpu]", int, float, double) {
  CHECK_IMAGE_SUPPORT
  auto mem_type = GENERATE(0, 1);
  int numDevices = 0;
@@ -264,7 +265,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "", int,
 *  - HIP_VERSION >= 5.2
 */

-TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice",
+                   "[multigpu]", int, float, double) {
  CHECK_IMAGE_SUPPORT
  auto mem_type = GENERATE(0, 1);
  int numDevices = 0;
@@ -506,7 +508,7 @@ static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
+TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  size_t input = 1 << 20;
  int numDevices = 0;
@@ -34,8 +34,7 @@ invalid
 #include <resource_guards.hh>
 #include <utils.hh>

-
-TEST_CASE("Unit_hipMemcpy2DFromArray_Positive_Default") {
+TEST_CASE("Unit_hipMemcpy2DFromArray_Positive_Default", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  using namespace std::placeholders;
@@ -34,7 +34,7 @@ of hipMemcpy2DFromArrayAsync api when parameters are invalid
 #include <resource_guards.hh>
 #include <utils.hh>

-TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Positive_Default") {
+TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Positive_Default", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  using namespace std::placeholders;
@@ -195,7 +195,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_PinnedHostMemSameGpu") {
           then A_d-->E_h  in GPU1
 * OUTPUT: validating the result by comparing A_h and E_h
 */
-TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem") {
+TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -254,7 +255,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem") {
 *         --> A_h host variable
 *         and verifying A_h with Phi
 * */
-TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDeviceContextChange") {
+TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDeviceContextChange",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -165,7 +165,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArray_PinnedMemSameGPU") {
 *         --> E_h host variable
 *         and verifying A_h with E_h
 */
-TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu") {
+TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -218,7 +219,7 @@ TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu") {
 *         --> A_h host variable
 *         and verifying A_h with Phi
 * */
-TEST_CASE("Unit_hipMemcpy2DFromArray_multiDeviceContextChange") {
+TEST_CASE("Unit_hipMemcpy2DFromArray_multiDeviceContextChange", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -33,8 +33,7 @@ unsuccessful execution of hipMemcpy2DToArray api when parameters are invalid
 #include <resource_guards.hh>
 #include <utils.hh>

-
-TEST_CASE("Unit_hipMemcpy2DToArray_Positive_Default") {
+TEST_CASE("Unit_hipMemcpy2DToArray_Positive_Default", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  using namespace std::placeholders;
@@ -34,8 +34,7 @@ of hipMemcpy2DToArrayAsync api when parameters are invalid
 #include <resource_guards.hh>
 #include <utils.hh>

-
-TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Positive_Default") {
+TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Positive_Default", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  using namespace std::placeholders;
@@ -194,7 +194,8 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_PinnedHostMemSameGpu") {
 *         --> A_h host variable
 *         and verifying A_h with E_h[0]+i(i.e., 10+i)
 */
-TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem") {
+TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -254,7 +255,8 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem") {
 *         --> A_h host variable
 *         and verifying A_h with Phi
 * */
-TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDeviceDeviceContextChange") {
+TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDeviceDeviceContextChange",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -164,7 +164,7 @@ TEST_CASE("Unit_hipMemcpy2DToArray_PinnedMemSameGPU") {
 *         --> A_h host variable
 *         and verifying A_h with E_h[0]+i(i.e., 10+i)
 */
-TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu") {
+TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu", "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -218,7 +218,8 @@ TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu") {
 *         --> A_h host variable
 *         and verifying A_h with Phi
 * */
-TEST_CASE("Unit_hipMemcpy2DToArray_multiDeviceDeviceContextChange") {
+TEST_CASE("Unit_hipMemcpy2DToArray_multiDeviceDeviceContextChange",
+          "[multigpu]") {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -309,7 +309,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset", "", int, f
 *  - HIP_VERSION >= 6.0
 */

-TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "[multigpu]", int, float,
+                   double) {
  CHECK_IMAGE_SUPPORT
  auto mem_type = GENERATE(0, 1);
  int numDevices = 0;
@@ -524,7 +525,7 @@ static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
+TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  size_t input = 1 << 20;
  int numDevices = 0;
@@ -715,7 +715,7 @@ TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
+TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -747,7 +747,7 @@ TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
+TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -609,7 +609,7 @@ TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
 *  - HIP_VERSION >= 5.2
 */

-TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
+TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -644,7 +644,7 @@ TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
 *  - HIP_VERSION >= 6.0
 */

-TEST_CASE("Unit_hipMemcpy3D_multiDevice_Basic_Size_Test") {
+TEST_CASE("Unit_hipMemcpy3D_multiDevice_Basic_Size_Test", "[multigpu]") {
  CHECK_IMAGE_SUPPORT
  constexpr int size_128b = 128, size_256b = 256;
  int numDevices = 0;
@@ -128,7 +128,8 @@ This testcase verifies the following scenarios
 4. Device context change
 5. H2D-D2D-D2H peer GPU
 */
-TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_H2H-H2D-D2H-H2PinMem", "", char, int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_H2H-H2D-D2H-H2PinMem", "[multigpu]",
+                   char, int, float, double) {
  TestType *A_d{nullptr}, *B_d{nullptr};
  TestType *A_h{nullptr}, *B_h{nullptr};
  TestType *A_Ph{nullptr}, *B_Ph{nullptr};
@@ -288,7 +289,8 @@ This testcase verifies hipMemcpy API with pinnedMemory and hostRegister
 along with kernel launches
 */

-TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
+                   "[multigpu]", int, float, double) {
  int numDevices = 0;
  HIP_CHECK(hipGetDeviceCount(&numDevices));
  if (numDevices < 2) {
@@ -90,8 +90,8 @@ Output:"B_h" host variable output of hipMemcpyAtoH API
        is then validated with "hData"
 */
 #if HT_AMD
-TEMPLATE_TEST_CASE("Unit_hipMemcpyAtoH_multiDevice-PeerDeviceContext", "[hipMemcpyAtoH]", char, int,
-                   float) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpyAtoH_multiDevice-PeerDeviceContext",
+                   "[hipMemcpyAtoH][multigpu]", char, int, float) {
  CHECK_IMAGE_SUPPORT

  int numDevices = 0;
@@ -38,7 +38,8 @@ This testcase verifies hipMemcpyDtoD API
 6.Kernel Launch
 7.DtoH copy and validating the result
 */
-TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "", int, float, double) {
+TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "[multigpu]", int, float,
+                   double) {
  size_t Nbytes = NUM_ELM * sizeof(TestType);
  int numDevices = 0;
  TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}, *X_d{nullptr}, *Y_d{nullptr}, *Z_d{nullptr};
--- a/نمایش بیشتر
+++ b/نمایش بیشتر