EXSWHTEC-281 - Implement Unit Tests for assert functions #210

Change-Id: I6c05915c957d9b67951b3e97cc35cb1ca72a945f [ROCm/hip-tests commit: 044a59496c]
2023-12-28 17:41:54 +00:00
commit dcade635c6
@@ -309,6 +309,8 @@
        "Performance_hipMemsetD32",
        "Performance_hipMemsetD32Async",
        "Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
+        "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
+        "Unit_Assert_Positive_Basic_KernelFail",
    #endif
    #if defined VEGA20
        "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===",
@@ -440,6 +440,11 @@
        "Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
        "Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
        "Unit_Thread_Block_Tile_Getters_Positive_Basic",
+        "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
+        "Unit_StaticAssert_Positive_Basic_RTC",
+        "Unit_Assert_Positive_Basic_KernelFail",
+        "Unit_StaticAssert_Positive_Basic",
+        "Unit_StaticAssert_Negative_Basic",
    #endif
        "End of json"
    ]
@@ -89,6 +89,9 @@
        "Performance_hipMemsetD32",
        "Performance_hipMemsetD32Async",
        "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
-        "Unit_hipMemcpy_Positive_Synchronization_Behavior"
+        "Unit_hipMemcpy_Positive_Synchronization_Behavior",
+        "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
+        "Unit_StaticAssert_Positive_Basic",
+        "Unit_StaticAssert_Negative_Basic"
    ]
 }
@@ -129,6 +129,19 @@ THE SOFTWARE.
    }                                                                                              \
  }

+// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
+#define HIPRTC_CHECK_ERROR(errorExpr, expectedError)                                               \
+  {                                                                                                \
+    auto localError = errorExpr;                                                                   \
+    INFO("Matching Errors: "                                                                       \
+         << "\n    Expected Error: " << hiprtcGetErrorString(expectedError)                        \
+         << "\n    Expected Code: " << expectedError << '\n'                                       \
+         << "                  Actual Error:   " << hiprtcGetErrorString(localError)               \
+         << "\n    Actual Code:   " << localError << "\nStr: " << #errorExpr                       \
+         << "\n    In File: " << __FILE__ << "\n    At line: " << __LINE__);                       \
+    REQUIRE(localError == expectedError);                                                          \
+  }
+
 #define HIPASSERT(condition)                                                                       \
  if (!(condition)) {                                                                              \
    printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__);                            \
@@ -59,3 +59,4 @@ add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246
 endif()
 add_subdirectory(synchronization)
 add_subdirectory(launchBounds)
+add_subdirectory(assertion)
@@ -0,0 +1,48 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+if(HIP_PLATFORM MATCHES "nvidia")
+    set(TEST_SRC
+        assert.cc
+    )
+    hip_add_exe_to_target(NAME AssertionTest
+                          TEST_SRC ${TEST_SRC}
+                          TEST_TARGET_NAME build_tests
+                          LINKER_LIBS nvrtc)
+elseif(HIP_PLATFORM MATCHES "amd")
+    set(TEST_SRC
+        static_assert.cc
+        assert.cc
+    )
+    hip_add_exe_to_target(NAME AssertionTest
+                          TEST_SRC ${TEST_SRC}
+                          TEST_TARGET_NAME build_tests
+                          LINKER_LIBS hiprtc)
+endif()
+
+add_test(NAME Unit_StaticAssert_Positive_Basic
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         static_assert_kernels_positive.cc 2)
+
+add_test(NAME Unit_StaticAssert_Negative_Basic
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         static_assert_kernels_negative.cc 2)
@@ -0,0 +1,118 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include <csetjmp>
+#include <csignal>
+
+/**
+ * @addtogroup assert assert
+ * @{
+ * @ingroup DeviceLanguageTest
+ * `void assert(int expression)` -
+ * Stops the kernel execution if expression is equal to zero.
+ */
+
+jmp_buf env_ignore_abort;
+volatile int abort_raised_flag = 0;
+
+void on_sigabrt(int signum) {
+  signal(signum, SIG_DFL);
+  abort_raised_flag = 1;
+  longjmp(env_ignore_abort, 1);
+}
+
+void try_and_catch_abort(void (*func)()) {
+  if (!setjmp(env_ignore_abort)) {
+    signal(SIGABRT, &on_sigabrt);
+    (*func)();
+    signal(SIGABRT, SIG_DFL);
+  }
+}
+
+__global__ void AssertPassKernel() {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  // expected always to be true
+  assert(tid >= 0);
+}
+
+__global__ void AssertFailKernel() {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  // expected to fail for the even thread indices
+  assert(tid % 2 == 1);
+}
+
+template <bool should_abort> void LaunchAssertKernel() {
+  const int num_blocks = 2;
+  const int num_threads = 16;
+
+  if constexpr (should_abort) {
+    AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>();
+#if HT_AMD
+    HIP_CHECK(hipDeviceSynchronize());
+#else
+    HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
+#endif
+  } else {
+    AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>();
+    HIP_CHECK(hipDeviceSynchronize());
+  }
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Launches kernels with asserts that have an expression equal to 1.
+ *  - Expects that SIGABRT is not raised and kernels have executed successfully.
+ * Test source
+ * ------------------------
+ *  - unit/assertion/assert.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
+  try_and_catch_abort(&LaunchAssertKernel<false>);
+  REQUIRE(abort_raised_flag == 0);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Launches kernels with asserts that have an expression equal to 0.
+ *  - Expects that SIGABRT is raised and kernels have been stopped on AMD.
+ *    - The HIP runtime also aborts the host code, so this test case uses signal handlers
+ *      to avoid host code abortion.
+ *  - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
+ *    - The host code is not aborted.
+ * Test source
+ * ------------------------
+ *  - unit/assertion/assert.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
+  try_and_catch_abort(&LaunchAssertKernel<true>);
+#if HT_AMD
+  REQUIRE(abort_raised_flag == 1);
+#else
+  REQUIRE(abort_raised_flag == 0);
+#endif
+}
@@ -0,0 +1,88 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include "static_assert_kernels_rtc.hh"
+
+/**
+ * @addtogroup static_assert static_assert
+ * @{
+ * @ingroup DeviceLanguageTest
+ * `void static_assert(constexpr expression, const char* message)` -
+ * Stops the compilation if expression is equal to zero, and displays the specified message.
+ */
+
+void StaticAssertWrapper(const char* program_source) {
+  hiprtcProgram program{};
+
+  HIPRTC_CHECK(
+      hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr));
+  hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
+
+  // Get the compile log and count compiler error messages
+  size_t log_size{};
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
+  std::string log(log_size, ' ');
+  HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
+  int error_count{0};
+
+  int expected_error_count{2};
+  std::string error_message{"error:"};
+
+  size_t n_pos = log.find(error_message, 0);
+  while (n_pos != std::string::npos) {
+    ++error_count;
+    n_pos = log.find(error_message, n_pos + 1);
+  }
+
+  HIPRTC_CHECK(hiprtcDestroyProgram(&program));
+  HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
+  REQUIRE(error_count == expected_error_count);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Compiles kernels with static_assert calls:
+ *    -# Expected that static_assert passes and compilation is successful.
+ *    -# Expected that static_assert fails and compilation has errors.
+ *  - Uses RTC to perform compilation.
+ * Test source
+ * ------------------------
+ *  - unit/assertion/static_assert.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); }
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Passes invalidly formed expressions to static_assert calls.
+ *  - Uses expressions that are not constexpr and values that are not known during compilation.
+ *  - Uses RTC to perform compilation.
+ * Test source
+ * ------------------------
+ *  - unit/assertion/static_assert.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); }
@@ -0,0 +1,30 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+__global__ void StaticAssertErrorKernel1() {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
+}
+
+__global__ void StaticAssertErrorKernel2() {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
+}
@@ -0,0 +1,32 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+__global__ void StaticAssertPassKernel1() {
+  static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
+}
+
+__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); }
+
+__global__ void StaticAssertFailKernel1() {
+  static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
+}
+
+__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); }
@@ -0,0 +1,56 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+/*
+Positive and negative kernels used for the static_assert Test Cases that are using RTC.
+*/
+
+static constexpr auto kStaticAssert_Positive{
+    R"(
+    __global__ void StaticAssertPassKernel1() {
+      static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
+    }
+
+    __global__ void StaticAssertPassKernel2() {
+      static_assert(10 > 5, "[StaticAssertPassKernel2]");
+    }
+
+    __global__ void StaticAssertFailKernel1() {
+      static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
+    }
+
+    __global__ void StaticAssertFailKernel2() {
+      static_assert(10 < 5, "[StaticAssertFailKernel2]");
+    }
+  )"};
+
+static constexpr auto kStaticAssert_Negative{
+    R"(
+    __global__ void StaticAssertErrorKernel1() {
+      const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+      static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
+    }
+
+    __global__ void StaticAssertErrorKernel2() {
+      int tid = threadIdx.x + blockIdx.x * blockDim.x;
+      static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
+    }
+  )"};
@@ -22,7 +22,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>

@@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector<T
 * ------------------------
 *    - Verify that data is correctly copied from a symbol. A graph is constructed to which a
 * MemcpyFromSymbol node is added. After graph execution, values in destination memory are compared
- * against values known to be in symbol memory.  
+ * against values known to be in symbol memory.
 * The test is run for scalar, const scalar, array, and const array symbols of types char, int,
 * float and double. For array symbols, the test is repeated for zero and non-zero offset values.
 * Verification is performed for destination memory allocated on host and device.
@@ -106,7 +105,7 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {

 /**
 * Test Description
- * ------------------------ 
+ * ------------------------
 *    - Verify API behavior with invalid arguments:
 *      -# pGraphNodes is nullptr
 *      -# graph is nullptr
@@ -122,12 +121,12 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {
 *      -# kind is illogical (hipMemcpyHostToDevice)
 *      -# kind is an invalid enum value
 * Test source
- * ------------------------ 
+ * ------------------------
 *    - unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc
 * Test requirements
- * ------------------------ 
+ * ------------------------
 *    - HIP_VERSION >= 5.2
- */ 
+ */
 TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") {
  using namespace std::placeholders;
  hipGraph_t graph = nullptr;
@@ -23,7 +23,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>

@@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect
 *    - Verify that data is correctly copied to a symbol. A graph is constructed to which a
 * MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed  and
 * the copied values are compared against values known to have been copied to symbol memory
- * previously.  
+ * previously.
 * The test is run for scalar, const scalar, array, and const array symbols of types char, int,
 * float and double. For array symbols, the test is repeated for zero and non-zero offset values.
 * Verification is performed for source memory allocated on host and device.
@@ -22,7 +22,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <resource_guards.hh>
 #include <utils.hh>
@@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") {
 * Allocate a 2D array using hipMallocPitch. Initialize the allocated memory
 * using hipGraphAddMemsetNode. Copy the values in device memory to host using
 * hipGraphAddMemcpyNode. Verify the results.
-*/
+ */
 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
  CHECK_IMAGE_SUPPORT

@@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
    }
  }
  // 2D Memory allocation hipMallocPitch
-  HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
-                          numH));
+  HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, numH));
  // Create Graph
  HIP_CHECK(hipGraphCreate(&graph, 0));
  hipGraphNode_t memsetNode, memcpyNode;
  // Add MemSet Node
  hipMemsetParams memsetParams{};
  memset(&memsetParams, 0, sizeof(memsetParams));
-  memsetParams.dst = reinterpret_cast<void *>(A_d);
+  memsetParams.dst = reinterpret_cast<void*>(A_d);
  memsetParams.value = memSetVal;
  memsetParams.pitch = pitch_A;
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = numW;
  memsetParams.height = numH;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-          &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);
  // Add MemCpy Node
  hipMemcpy3DParms myparms{};
@@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
  myparms.extent = make_hipExtent(width, numH, 1);
  myparms.kind = hipMemcpyDeviceToHost;
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();
  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

  // Verfication
  for (size_t i = 0; i < numW; i++) {
    for (size_t j = 0; j < numH; j++) {
-      REQUIRE(*(A_h + i*numH + j) == memSetVal);
+      REQUIRE(*(A_h + i * numH + j) == memSetVal);
    }
  }
  HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
 * Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using
 * hipGraphAddMemsetNode. Copy the values in device memory to host using
 * hipGraphAddMemcpyNode. Verify the results.
-*/
+ */
 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
  CHECK_IMAGE_SUPPORT

  size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A;
-  char *A_d;
+  char* A_d;

  // Initialize the host memory
  std::vector<char> A_h(numW, ' ');
@@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
  hipGraph_t graph;
  std::vector<hipGraphNode_t> nodeDependencies;
  // 1D Memory allocation hipMallocPitch
-  HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
-                          1));
+  HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, 1));
  // Create Graph
  HIP_CHECK(hipGraphCreate(&graph, 0));
  hipGraphNode_t memsetNode, memcpyNode;
  // Add MemSet Node
  hipMemsetParams memsetParams{};
  memset(&memsetParams, 0, sizeof(memsetParams));
-  memsetParams.dst = reinterpret_cast<void *>(A_d);
+  memsetParams.dst = reinterpret_cast<void*>(A_d);
  memsetParams.value = memSetVal;
  memsetParams.pitch = pitch_A;
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = numW;
  memsetParams.height = 1;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-          &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);
  // Add MemCpy Node
  hipMemcpy3DParms myparms{};
@@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
  myparms.extent = make_hipExtent(width, 1, 1);
  myparms.kind = hipMemcpyDeviceToHost;
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();

  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

@@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
 * Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using
 * hipGraphAddMemsetNode. Copy the values in device memory to host using
 * hipGraphAddMemcpyNode. Verify the results.
-*/
+ */
 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
  CHECK_IMAGE_SUPPORT

@@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = numW;
  memsetParams.height = numH;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-          &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);

  // MemCpy params
@@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {

  // Add MemCpy Node
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();

  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

  // Verfication
  for (size_t i = 0; i < numW; i++) {
    for (size_t j = 0; j < numH; j++) {
-      REQUIRE(*(A_h + i*numH + j) == memSetVal);
+      REQUIRE(*(A_h + i * numH + j) == memSetVal);
    }
  }
  HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
 * Allocate a 1D array using hipMalloc3D. Initialize the allocated
 * memory using hipGraphAddMemsetNode. Copy the values in device
 * memory to host using hipGraphAddMemcpyNode. Verify the results.
-*/
+ */
 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
  CHECK_IMAGE_SUPPORT

@@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = numW;
  memsetParams.height = 1;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-          &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);

  // MemCpy params
@@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {

  // Add MemCpy Node
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();

  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

  // Verfication
  for (size_t i = 0; i < numW; i++) {
-     REQUIRE(A_h[i] == memSetVal);
+    REQUIRE(A_h[i] == memSetVal);
  }
  HIP_CHECK(hipGraphExecDestroy(graphExec));
  HIP_CHECK(hipGraphDestroy(graph));
@@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
 * Allocate a 1D array using hipMalloc. Initialize the allocated memory using
 * hipGraphAddMemsetNode. Copy the values in device memory to host using
 * hipGraphAddMemcpyNode. Verify the results.
-*/
+ */
 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
-  char *A_d;
+  char* A_d;
  size_t NumW = SIZE;
  size_t Nbytes1D = SIZE * sizeof(char);

@@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
  // Add Memset node
  hipMemsetParams memsetParams{};
  memset(&memsetParams, 0, sizeof(memsetParams));
-  memsetParams.dst = reinterpret_cast<void *>(A_d);
+  memsetParams.dst = reinterpret_cast<void*>(A_d);
  memsetParams.value = memSetVal;
  memsetParams.pitch = Nbytes1D;
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = NumW;
  memsetParams.height = 1;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-            &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);
  // Add MemCpy Node
  hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0};
@@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
  myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
  myparms.kind = hipMemcpyDeviceToHost;
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();
  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

  // Verfication
  for (size_t i = 0; i < NumW; i++) {
-     REQUIRE(A_h[i] == memSetVal);
+    REQUIRE(A_h[i] == memSetVal);
  }
  HIP_CHECK(hipGraphExecDestroy(graphExec));
  HIP_CHECK(hipGraphDestroy(graph));
@@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {

 TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
  int managed = 0;
-  HIP_CHECK(hipDeviceGetAttribute(&managed,
-                                  hipDeviceAttributeManagedMemory, 0));
+  HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
  INFO("hipDeviceAttributeManagedMemory: " << managed);
  if (managed != 1) {
    WARN(
-      "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
-       "so defaulting to system memory.");
+        "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
+        "so defaulting to system memory.");
  }
  size_t Nbytes1D = SIZE * sizeof(char);
-  char *A_d;
+  char* A_d;
  // Initialize the host memory
  std::vector<char> A_h(SIZE, ' ');
  // Device Memory
@@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
  // Add Memset node
  hipMemsetParams memsetParams{};
  memset(&memsetParams, 0, sizeof(memsetParams));
-  memsetParams.dst = reinterpret_cast<void *>(A_d);
+  memsetParams.dst = reinterpret_cast<void*>(A_d);
  memsetParams.value = memSetVal;
  memsetParams.pitch = Nbytes1D;
  memsetParams.elementSize = sizeof(char);
  memsetParams.width = SIZE;
  memsetParams.height = 1;
-  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
-            &memsetParams));
+  HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
  nodeDependencies.push_back(memsetNode);

  // Add MemCpy Node
@@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
  myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
  myparms.kind = hipMemcpyDeviceToHost;
  HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
-                                            nodeDependencies.size(), &myparms));
+                                  nodeDependencies.size(), &myparms));
  nodeDependencies.clear();

  // Create executable graph
  hipStream_t streamForGraph;
  hipGraphExec_t graphExec;
  HIP_CHECK(hipStreamCreate(&streamForGraph));
-  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
-                                nullptr, 0));
+  HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
  HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
  HIP_CHECK(hipStreamSynchronize(streamForGraph));

  // Verfication
  for (size_t i = 0; i < SIZE; i++) {
-     REQUIRE(A_h[i] == memSetVal);
+    REQUIRE(A_h[i] == memSetVal);
  }

  HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -22,7 +22,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>

@@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s
 * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
 * incorrect parameters. After the graph is instantiated the parameters are updated to correct
 * values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the
- * copied values are compared against values known to have been copied to symbol memory previously.  
+ * copied values are compared against values known to have been copied to symbol memory previously.
 * The test is run for scalar, const scalar, array, and const array symbols of types char, int,
 * float and double. For array symbols, the test is repeated for zero and non-zero offset values.
 * Verification is performed for destination memory allocated on host and device.
@@ -21,7 +21,6 @@ THE SOFTWARE.

 #include <functional>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>

 #include "graph_memset_node_test_common.hh"
@@ -46,7 +45,7 @@ THE SOFTWARE.
 * which also constitutes a test for said API.
 * The test is repeated for all valid element sizes(1,
 * 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes
- * can be updated), both on host and device 
+ * can be updated), both on host and device
 * Test source
 * ------------------------
 *    - unit/graph/hipGraphExecMemsetNodeSetParams.cc
@@ -22,7 +22,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>

@@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym
 *    - Verify that data is correctly copied from a symbol after node parameters are set following
 * node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but
 * incorrect parameters. The parameters are then updated to correct values and the graph executed.
- * Values in destination memory are compared against values known to be in symbol memory.  
+ * Values in destination memory are compared against values known to be in symbol memory.
 * The test is run for scalar, const scalar, array, and const array symbols of types char, int,
 * float and double. For array symbols, the test is repeated for zero and non-zero offset values.
 * Verification is performed for destination memory allocated on host and device.
@@ -22,7 +22,6 @@ THE SOFTWARE.
 #include <functional>
 #include <vector>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>

@@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo
 * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
 * incorrect parameters. The parameters are then updated to correct values and the graph executed.
 * After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against
- * values known to have been copied to symbol memory previously.  
+ * values known to have been copied to symbol memory previously.
 * The test is run for scalar, const scalar, array, and const array symbols of types char, int,
 * float and double. For array symbols, the test is repeated for zero and non-zero offset values.
 * Verification is performed for destination memory allocated on host and device.
@@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>
 #include <resource_guards.hh>

@@ -21,7 +21,6 @@ THE SOFTWARE.

 #include <functional>

-#include <hip_test_defgroups.hh>
 #include <hip_test_common.hh>

 #include "graph_memset_node_test_common.hh"
@@ -44,7 +43,7 @@ THE SOFTWARE.
 * The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test
 * for said API.
 * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different
- * height and width both on host and device 
+ * height and width both on host and device
 * Test source
 * ------------------------
 *    - unit/graph/hipGraphMemsetNodeSetParams.cc
@@ -18,7 +18,6 @@ THE SOFTWARE.
 */
 #include <hip_test_common.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>

 #include "stream_capture_common.hh"

@@ -19,8 +19,7 @@ THE SOFTWARE.

 #include <hip_test_common.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>
-#include "stream_capture_common.hh" // NOLINT
+#include "stream_capture_common.hh"  // NOLINT

 #pragma clang diagnostic ignored "-Wunused-variable"
 /**
@@ -56,8 +55,7 @@ static void hostNodeCallback(void* data) {
 }

 template <typename T, typename F>
-void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
-                                hipStream_t stream) {
+void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) {
  constexpr size_t N = 1000000;
  size_t Nbytes = N * sizeof(T);

@@ -89,8 +87,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
    std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
    HIP_CHECK(hipGraphLaunch(graphExec, stream));
    HIP_CHECK(hipStreamSynchronize(stream));
-    ArrayFindIfNot(B_h.host_ptr(),
-    static_cast<float>(i) * static_cast<float>(i), N);
+    ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * static_cast<float>(i), N);
  }

  HIP_CHECK(hipGraphExecDestroy(graphExec))
@@ -117,16 +114,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {
  StreamGuard stream_guard(stream_type);
  hipStream_t stream = stream_guard.stream();

-  const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
-        hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
+  const hipStreamCaptureMode captureMode = GENERATE(
+      hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);

  EventsGuard events_guard(3);
  StreamsGuard streams_guard(2);

  SECTION("Linear graph capture") {
    captureStreamAndLaunchGraph<float>(
-        [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
-        hipStream_t stream) {
+        [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) {
          return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
        },
        captureMode, stream);
@@ -134,10 +130,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {

  SECTION("Branched graph capture") {
    captureStreamAndLaunchGraph<float>(
-        [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h,
-                  float* B_d, size_t N, hipStream_t stream) {
-          captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream,
-          streams_guard.stream_list(), events_guard.event_list());
+        [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
+                                        hipStream_t stream) {
+          captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(),
+                                  events_guard.event_list());
        },
        captureMode, stream);
  }
@@ -173,8 +169,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
                    hipErrorIllegalState);
  }
  SECTION("Creating hipStream with invalid mode") {
-    HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)),
-                    hipErrorInvalidValue);
+    HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue);
  }
 #if HT_NVIDIA  // EXSWHTEC-216
  SECTION("Stream capture on uninitialized stream returns error code.") {
@@ -182,8 +177,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
      StreamGuard sg(Streams::created);
      return sg.stream();
    };
-    HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(),
-                                          hipStreamCaptureModeGlobal),
+    HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal),
                    hipErrorContextIsDestroyed);
  }
 #endif
@@ -207,8 +201,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {
  StreamGuard stream_guard(stream_type);
  hipStream_t s = stream_guard.stream();

-  const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
-        hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
+  const hipStreamCaptureMode captureMode = GENERATE(
+      hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);

  HIP_CHECK(hipStreamBeginCapture(s, captureMode));

@@ -218,8 +212,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {

 /* Local function for inter stream event synchronization
 */
-static void interStrmEventSyncCapture(const hipStream_t& stream1,
-                                      const hipStream_t& stream2) {
+static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
  hipGraph_t graph1{nullptr}, graph2{nullptr};
  hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};

@@ -266,8 +259,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1,

 /* Local function for colligated stream capture
 */
-static void colligatedStrmCapture(const hipStream_t& stream1,
-                                  const hipStream_t& stream2) {
+static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
  hipGraph_t graph1{nullptr}, graph2{nullptr};
  hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};

@@ -310,8 +302,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1,

 /* Local function for colligated stream capture functionality
 */
-static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
-                                      const hipStream_t& stream2) {
+static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) {
  constexpr size_t N = 1000000;
  size_t Nbytes = N * sizeof(int);

@@ -331,10 +322,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
  // Capture 2 streams
  HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
  HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal));
-  captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(),
-                        N, stream1);
-  captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(),
-                        N, stream2);
+  captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
+  captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
  captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
  captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
  HIP_CHECK(hipStreamEndCapture(stream1, &graph1));
@@ -370,9 +359,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,

 /* Stream Capture thread function
 */
-static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d,
-                                  int* B_h, int* B_d, hipGraph_t* graph,
-                                  size_t N, hipStreamCaptureMode mode) {
+static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d,
+                                  hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) {
  // Capture stream
  HIP_CHECK(hipStreamBeginCapture(stream, mode));
  captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
@@ -404,10 +392,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) {
  LinearAllocGuard<int> D_d(LinearAllocs::hipMalloc, Nbytes);

  // Launch 2 threads to capture the 2 streams into graphs
-  std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(),
-                B_h.host_ptr(), B_d.ptr(), &graph1, N, mode);
-  std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(),
-                D_h.host_ptr(), D_d.ptr(), &graph2, N, mode);
+  std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(),
+                 B_d.ptr(), &graph1, N, mode);
+  std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(),
+                 D_d.ptr(), &graph2, N, mode);
  t1.join();
  t2.join();

@@ -480,11 +468,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") {
 TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") {
  int minPriority = 0, maxPriority = 0;
  HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
-  StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
-                            minPriority);
+  StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
  hipStream_t stream1 = stream_guard1.stream();
-  StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
-                            maxPriority);
+  StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
  hipStream_t stream2 = stream_guard2.stream();
  interStrmEventSyncCapture(stream1, stream2);
 }
@@ -533,11 +519,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") {
 TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") {
  int minPriority = 0, maxPriority = 0;
  HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
-  StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
-                            minPriority);
+  StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
  hipStream_t stream1 = stream_guard1.stream();
-  StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
-                            maxPriority);
+  StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
  hipStream_t stream2 = stream_guard2.stream();
  colligatedStrmCapture(stream1, stream2);
 }
@@ -578,8 +562,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") {
 *    - HIP_VERSION >= 5.2
 */
 TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") {
-  const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
-        hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
+  const hipStreamCaptureMode captureMode = GENERATE(
+      hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
  multithreadedTest(captureMode);
 }

@@ -708,8 +692,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") {
  HIP_CHECK(hipEventRecord(events[2], streams[2]));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
-  HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
-                           streams[0]));
+  HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0]));
  HIP_CHECK(hipStreamEndCapture(streams[0], &graph));  // End Capture
  // Reset device memory
  HIP_CHECK(hipMemset(devMem, 0, sizeof(int)));
@@ -751,8 +734,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") {
  dummyKernel<<<1, 1, 0, streams[0]>>>();
  // Since stream[1] is already in capture mode due to event wait
  // hipStreamBeginCapture on stream[1] is expected to return error.
-  HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1],
-                                        hipStreamCaptureModeGlobal),
+  HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal),
                  hipErrorIllegalState);
 }

@@ -785,8 +767,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") {
  for (int i = 0; i < 3; i++) {
    HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
    for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem);
-    HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
-                             stream1));
+    HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1));
    HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i]));
  }
  // Instantiate and execute all graphs
@@ -825,22 +806,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") {
  EventsGuard events_guard(1);
  hipEvent_t e = events_guard[0];

-  const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
-        hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
+  const hipStreamCaptureMode captureMode = GENERATE(
+      hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);

  HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
  SECTION("Synchronize stream during capture") {
-    HIP_CHECK_ERROR(hipStreamSynchronize(stream),
-                    hipErrorStreamCaptureUnsupported);
+    HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported);
  }
  SECTION("Query stream during capture") {
-    HIP_CHECK_ERROR(hipStreamQuery(stream),
-                    hipErrorStreamCaptureUnsupported);
+    HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported);
  }
 #if HT_NVIDIA
  SECTION("Synchronize device during capture") {
-    HIP_CHECK_ERROR(hipDeviceSynchronize(),
-                    hipErrorStreamCaptureUnsupported);
+    HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported);
  }
  SECTION("Synchronize event during capture") {
    HIP_CHECK(hipEventRecord(e, stream));
@@ -884,17 +862,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") {

  HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
  SECTION("hipMalloc during capture") {
-    HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)),
-                    hipErrorStreamCaptureUnsupported);
+    HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported);
  }
  SECTION("hipMemcpy during capture") {
-    HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int),
-                    hipMemcpyHostToDevice),
+    HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice),
                    hipErrorStreamCaptureImplicit);
  }
  SECTION("hipMemset during capture") {
-    HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)),
-                    hipErrorStreamCaptureImplicit);
+    HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit);
  }
 }
 #endif
@@ -931,8 +906,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
    HIP_CHECK(hipEventRecord(e, stream1));
    HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
    dummyKernel<<<1, 1, 0, stream2>>>();
-    HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
-                    hipErrorStreamCaptureUnjoined);
+    HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
  }
  SECTION("End strm capture when forked strm still has operations") {
    EventsGuard events_guard(2);
@@ -946,8 +920,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
    HIP_CHECK(hipEventRecord(e2, stream2));
    HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0));
    dummyKernel<<<1, 1, 0, stream2>>>();
-    HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
-                    hipErrorStreamCaptureUnjoined);
+    HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
  }
 }
 /**
@@ -970,19 +943,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
    SUCCEED("skipping the testcases as numDevices < 2");
    return;
  }
-  hipStream_t* stream = reinterpret_cast<hipStream_t*>
-                       (malloc(devcount * sizeof(hipStream_t)));
+  hipStream_t* stream = reinterpret_cast<hipStream_t*>(malloc(devcount * sizeof(hipStream_t)));
  REQUIRE(stream != nullptr);
-  hipGraph_t* graph = reinterpret_cast<hipGraph_t*>
-                       (malloc(devcount * sizeof(hipGraph_t)));
+  hipGraph_t* graph = reinterpret_cast<hipGraph_t*>(malloc(devcount * sizeof(hipGraph_t)));
  REQUIRE(graph != nullptr);
  int **devMem{nullptr}, **hostMem{nullptr};
  hostMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
  REQUIRE(hostMem != nullptr);
  devMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
  REQUIRE(devMem != nullptr);
-  hipGraphExec_t* graphExec = reinterpret_cast<hipGraphExec_t*>
-                              (malloc(devcount * sizeof(hipGraphExec_t)));
+  hipGraphExec_t* graphExec =
+      reinterpret_cast<hipGraphExec_t*>(malloc(devcount * sizeof(hipGraphExec_t)));
  // Capture stream in each device
  for (int dev = 0; dev < devcount; dev++) {
    HIP_CHECK(hipSetDevice(dev));
@@ -994,15 +965,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
    for (int i = 0; i < (dev + 1); i++) {
      incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]);
    }
-    HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int),
-            hipMemcpyDefault, stream[dev]));
+    HIP_CHECK(
+        hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev]));
    HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev]));
  }
  // Launch the captured graphs in the respective device
  for (int dev = 0; dev < devcount; dev++) {
    HIP_CHECK(hipSetDevice(dev));
-    HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr,
-                                  nullptr, 0));
+    HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0));
    HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev]));
  }
  // Validate output
@@ -1069,8 +1039,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") {
  HIP_CHECK(hipEventRecord(events[3], streams[2]));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
-  HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int),
-                           hipMemcpyDefault, streams[0]));
+  HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault,
+                           streams[0]));
  HIP_CHECK(hipStreamEndCapture(streams[0], &graph));  // End Capture
  // Reset device memory
  HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int)));
@@ -1108,23 +1078,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
  hipGraph_t graphs[3];
  StreamsGuard streams(3);
  EventsGuard events(4);
-  LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>
-                                    (LinearAllocs::malloc, sizeof(int));
-  LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>
-                                    (LinearAllocs::malloc, sizeof(int));
-  LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>
-                                    (LinearAllocs::malloc, sizeof(int));
-  LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>
-                                    (LinearAllocs::hipMalloc, sizeof(int));
-  LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>
-                                    (LinearAllocs::hipMalloc, sizeof(int));
-  LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>
-                                    (LinearAllocs::hipMalloc, sizeof(int));
+  LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
+  LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
+  LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
+  LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
+  LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
+  LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));

-  std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(),
-                               hostMem_g3.host_ptr()};
-  std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(),
-                              devMem_g3.ptr()};
+  std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()};
+  std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()};
  // Create a device memory of size int and initialize it to 0
  for (int i = 0; i < 3; i++) {
    memset(hostMem[i], 0, sizeof(int));
@@ -1148,16 +1110,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
  HIP_CHECK(hipEventRecord(events[3], streams[2]));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
-  HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int),
-            hipMemcpyDefault, streams[0]));
+  HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0]));
  HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0]));  // End Capture
  // Start capturing graph2 from stream 2
  HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal));
  incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
  incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
  incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
-  HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int),
-            hipMemcpyDefault, streams[1]));
+  HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1]));
  HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1]));  // End Capture
  // Start capturing graph3 from stream 3
  HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal));
@@ -1166,8 +1126,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
  incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
  incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
  incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
-  HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int),
-            hipMemcpyDefault, streams[2]));
+  HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2]));
  HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2]));  // End Capture
  // Reset device memory
  HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int)));
@@ -1211,40 +1170,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
  EventsGuard events(7);
  // Allocate Device memory and Host memory
  size_t N = GRIDSIZE * BLOCKSIZE;
-  LinearAllocGuard<int> Ah = LinearAllocGuard<int>
-                            (LinearAllocs::malloc, N * sizeof(int));
-  LinearAllocGuard<int> Bh = LinearAllocGuard<int>
-                            (LinearAllocs::malloc, N * sizeof(int));
-  LinearAllocGuard<int> Ch = LinearAllocGuard<int>
-                            (LinearAllocs::malloc, N * sizeof(int));
-  LinearAllocGuard<int> Ad = LinearAllocGuard<int>
-                            (LinearAllocs::hipMalloc, N * sizeof(int));
-  LinearAllocGuard<int> Bd = LinearAllocGuard<int>
-                            (LinearAllocs::hipMalloc, N * sizeof(int));
+  LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
+  LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
+  LinearAllocGuard<int> Ch = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
+  LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));
+  LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));

  // Capture streams into graph
  HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal));
  HIP_CHECK(hipEventRecord(events[0], streams[0]));
  HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0));
  HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0));
-  HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)),
-            hipMemcpyDefault, streams[0]));
-  HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)),
-            hipMemcpyDefault, streams[4]));
+  HIP_CHECK(
+      hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
+  HIP_CHECK(
+      hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4]));
  hipHostFn_t fn = hostNodeCallback;
  HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr));
  HIP_CHECK(hipEventRecord(events[1], streams[0]));
  HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0));
  int* Ad_2nd_half = Ad.ptr() + N / 2;
  int* Ad_1st_half = Ad.ptr();
-  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half,
-                                                    CONST_KER2_VAL);
-  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
-                                                    CONST_KER1_VAL);
+  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half, CONST_KER2_VAL);
+  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER1_VAL);
  HIP_CHECK(hipEventRecord(events[2], streams[1]));
  HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0));
-  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
-                                                    CONST_KER3_VAL);
+  mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER3_VAL);
  HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr));
  HIP_CHECK(hipEventRecord(events[6], streams[1]));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0));
@@ -1255,8 +1206,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
  HIP_CHECK(hipEventRecord(events[4], streams[3]));
  HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0));
-  HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)),
-                           hipMemcpyDefault, streams[0]));
+  HIP_CHECK(
+      hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
  HIP_CHECK(hipStreamEndCapture(streams[0], &graph));  // End Capture
  // Execute and test the graph
  hipGraphExec_t graphExec{nullptr};
@@ -1269,11 +1220,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
    HIP_CHECK(hipStreamSynchronize(streams[0]));
    for (size_t i = 0; i < N; i++) {
      if (i > (N / 2 - 1)) {
-        REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
-                                     Ah.host_ptr()[i] * CONST_KER2_VAL));
+        REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL));
      } else {
-        REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
-                     Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
+        REQUIRE(Ch.host_ptr()[i] ==
+                (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
      }
    }
  }
@@ -1340,14 +1290,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
  hipGraph_t graph{nullptr};
  hipGraphExec_t graphExec{nullptr};
  // Allocate device memory
-  LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
-                            BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Ad =
+      LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Bd =
+      LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
  // Fill input data
  std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
  std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
@@ -1357,10 +1305,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
  SECTION("Stream Creation Before Capture") {
    StreamsGuard stream1(1);
    HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
    HIP_CHECK(hipStreamSynchronize(stream1[0]));
    myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
    HIP_CHECK(hipStreamEndCapture(stream0[0], &graph));  // End Capture
@@ -1368,10 +1316,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
  SECTION("Synchronizing multiple streams during Capture") {
    StreamsGuard stream1(1), stream2(1);
    HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream2[0]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream2[0]));
    HIP_CHECK(hipStreamSynchronize(stream1[0]));
    HIP_CHECK(hipStreamSynchronize(stream2[0]));
    myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
@@ -1380,20 +1328,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
  SECTION("Stream Creation After Capture") {
    HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
    StreamsGuard stream1(1);
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
    HIP_CHECK(hipStreamSynchronize(stream1[0]));
    myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
    HIP_CHECK(hipStreamEndCapture(stream0[0], &graph));  // End Capture
  }
  SECTION("Stream Synchronize Before Capture") {
    StreamsGuard stream1(1);
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
    HIP_CHECK(hipStreamSynchronize(stream1[0]));
    HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
    myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
@@ -1404,10 +1352,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
    myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
    HIP_CHECK(hipStreamEndCapture(stream0[0], &graph));  // End Capture
    StreamsGuard stream1(1);
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream1[0]));
    HIP_CHECK(hipStreamSynchronize(stream1[0]));
  }
  // Execute and test the graph
@@ -1415,8 +1363,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
  HIP_CHECK(hipGraphLaunch(graphExec, stream0[0]));
  HIP_CHECK(hipStreamSynchronize(stream0[0]));
  // Check output
-  HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
-            hipMemcpyDeviceToHost));
+  HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
  for (int idx = 0; idx < BLOCKSIZE; idx++) {
    REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
  }
@@ -1437,20 +1384,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
 *    - HIP_VERSION >= 5.6
 */
 // Local function executed as thread
-static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd,
-            int BLOCKSIZE, hipError_t *error) {
+static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) {
  StreamsGuard stream(1);
-  HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream[0]));
-  HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int),
-            hipMemcpyDefault, stream[0]));
+  HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
+  HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
  *error = hipStreamSynchronize(stream[0]);
 }

 // Local function executed as thread
-static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad,
-            int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE,
-            hipStreamCaptureMode flag, hipError_t *error) {
+static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE,
+                              int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) {
  StreamsGuard stream(1);
  // Capture streams into graph
  HIP_CHECK(hipStreamBeginCapture(stream[0], flag));
@@ -1466,14 +1409,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
  constexpr int VALUE1 = 7, VALUE2 = 11;
  hipGraph_t graph{nullptr};
  // Allocate device memory
-  LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
-                            BLOCKSIZE * sizeof(int));
-  LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
-                            BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Ad =
+      LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
+  LinearAllocGuard<int> Bd =
+      LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
  // Fill input data
  std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
  std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
@@ -1483,10 +1424,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
    StreamsGuard stream(2);
    // Capture streams into graph
    HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal));
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
-            BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
-            BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream[1]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream[1]));
    error = hipStreamSynchronize(stream[1]);
    REQUIRE(error == hipErrorStreamCaptureUnsupported);
  }
@@ -1494,34 +1435,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
  SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") {
    StreamsGuard stream(2);
    // Capture streams into graph
-    HIP_CHECK(hipStreamBeginCapture(stream[0],
-            hipStreamCaptureModeThreadLocal));
-    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
-            BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
-    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
-            BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
+    HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal));
+    HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream[1]));
+    HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
+                             stream[1]));
    error = hipStreamSynchronize(stream[1]);
    REQUIRE(error == hipErrorStreamCaptureUnsupported);
  }
 #endif
 #if HT_AMD
  SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") {
-    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
-                Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
-                hipStreamCaptureModeGlobal, &error);
+    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
+                      hipStreamCaptureModeGlobal, &error);
    REQUIRE(error == hipErrorStreamCaptureUnsupported);
  }
 #endif
  SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") {
-    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
-                Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
-                hipStreamCaptureModeThreadLocal, &error);
+    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
+                      hipStreamCaptureModeThreadLocal, &error);
    REQUIRE(error == hipSuccess);
  }
  SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") {
-    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
-                Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
-                hipStreamCaptureModeRelaxed, &error);
+    captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
+                      hipStreamCaptureModeRelaxed, &error);
    REQUIRE(error == hipSuccess);
  }
  if (graph != nullptr) {
@@ -1532,8 +1469,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
    HIP_CHECK(hipGraphLaunch(graphExec, stream[0]));
    HIP_CHECK(hipStreamSynchronize(stream[0]));
    // Check output
-    HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
-              hipMemcpyDeviceToHost));
+    HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
    for (int idx = 0; idx < BLOCKSIZE; idx++) {
      REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
    }
@@ -19,7 +19,6 @@ THE SOFTWARE.

 #include <hip_test_common.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>

 #include "stream_capture_common.hh"

@@ -18,7 +18,6 @@ THE SOFTWARE.
 */

 #include <hip_test_checkers.hh>
-#include <hip_test_defgroups.hh>
 #include <hip_test_kernels.hh>

 #include "stream_capture_common.hh"
@@ -19,7 +19,6 @@ THE SOFTWARE.

 #include <hip_test_checkers.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>

 #include "stream_capture_common.hh"

@@ -18,7 +18,6 @@ THE SOFTWARE.
 */

 #include <hip_test_common.hh>
-#include <hip_test_defgroups.hh>
 #include <hip_test_kernels.hh>

 #include "stream_capture_common.hh"
@@ -20,7 +20,6 @@ THE SOFTWARE.
 #include <hip_test_checkers.hh>
 #include <hip_test_common.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>

 #include "stream_capture_common.hh"

@@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") {
  const hipStreamUpdateCaptureDependenciesFlags flag =
      GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies);

-  HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal));
+  HIP_CHECK(hipStreamBeginCapture(stream, captureMode));  // hipStreamCaptureModeGlobal));

  HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag));

@@ -20,7 +20,6 @@ THE SOFTWARE.
 #include <hip_test_checkers.hh>
 #include <hip_test_common.hh>
 #include <hip_test_kernels.hh>
-#include <hip_test_defgroups.hh>

 #include "stream_capture_common.hh"

@@ -21,7 +21,6 @@ THE SOFTWARE.
 #include <hip_test_checkers.hh>
 #include <hip_test_common.hh>
 #include <hip/hip_fp16.h>
-#include <hip_test_defgroups.hh>

 #define WIDTH 4

@@ -32,20 +31,17 @@ THE SOFTWARE.
 #define THREADS_PER_BLOCK_Z 1

 // Device (Kernel) function, it must be void
-template <typename T>
-__global__ void matrixTranspose(T* out, T* in, const int width) {
+template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
  int x = blockDim.x * blockIdx.x + threadIdx.x;
  T val = in[x];
  for (int i = 0; i < width; i++) {
-    for (int j = 0; j < width; j++)
-      out[i * width + j] = __shfl(val, j * width + i);
+    for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
  }
 }

 // CPU implementation of matrix transpose
 template <typename T>
-void matrixTransposeCPUReference(T* output,
-                                 T* input, const unsigned int width) {
+void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
  for (unsigned int j = 0; j < width; j++) {
    for (unsigned int i = 0; i < width; i++) {
      output[i * width + j] = input[j * width + i];
@@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output,
 }

 static void getFactor(int* fact) { *fact = 101; }
-static void getFactor(unsigned int* fact) {
-  *fact = static_cast<unsigned int>(INT32_MAX)+1;
-}
+static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
 static void getFactor(float* fact) { *fact = 2.5; }
 static void getFactor(__half* fact) { *fact = 2.5; }
 static void getFactor(double* fact) { *fact = 2.5; }
 static void getFactor(int64_t* fact) { *fact = 303; }
-static void getFactor(uint64_t* fact) {
-  *fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
-}
+static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }

-template <typename T>
-int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
+template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
  int errors = 0;
  for (int i = 0; i < NUM; i++) {
-      if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
-          errors++;
-      }
-  }
-  return errors;
-}
-
-template <>
-int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
-  int errors = 0;
-  for (int i = 0; i < NUM; i++) {
-    if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
+    if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
      errors++;
    }
  }
  return errors;
 }

-template <typename T>
-void init(T* Matrix) {
+template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
+  int errors = 0;
+  for (int i = 0; i < NUM; i++) {
+    if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) {  // NOLINT
+      errors++;
+    }
+  }
+  return errors;
+}
+
+template <typename T> void init(T* Matrix) {
  // initialize the input data
  T factor;
  getFactor(&factor);
  for (int i = 0; i < NUM; i++) {
-      Matrix[i] = (T)i + factor;
+    Matrix[i] = (T)i + factor;
  }
 }

-template <>
-void init(__half* Matrix) {
+template <> void init(__half* Matrix) {
  // initialize the input data
  __half factor;
  getFactor(&factor);
  for (int i = 0; i < NUM; i++) {
-      Matrix[i] = i + __half2float(factor);
+    Matrix[i] = i + __half2float(factor);
  }
 }

-template<typename T>
-static void runTest() {
+template <typename T> static void runTest() {
  T* Matrix;
  T* TransposeMatrix;
  T* cpuTransposeMatrix;
@@ -129,21 +116,17 @@ static void runTest() {

  // allocate the memory on the device side
  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix),
-                       NUM * sizeof(T)));
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));

  // Memory transfer from host to device
-  HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T),
-                      hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));

  // Lauching kernel from host
-  hipLaunchKernelGGL(matrixTranspose<T>, dim3(1),
-                    dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
-                    gpuTransposeMatrix, gpuMatrix, WIDTH);
+  hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
+                     0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);

  // Memory transfer from device to host
-  HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix,
-                       NUM * sizeof(T), hipMemcpyDeviceToHost));
+  HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));

  // CPU MatrixTranspose computation
  matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -183,26 +166,12 @@ static void runTest() {
 */

 TEST_CASE("Unit_hipShflTests") {
-  SECTION("run test for int") {
-    runTest<int>();
-  }
-  SECTION("run test for float") {
-    runTest<float>();
-  }
-  SECTION("run test for double") {
-    runTest<double>();
-  }
+  SECTION("run test for int") { runTest<int>(); }
+  SECTION("run test for float") { runTest<float>(); }
+  SECTION("run test for double") { runTest<double>(); }
  // Test added to support half datatype.
-  SECTION("run test for __half") {
-    runTest<__half>();
-  }
-  SECTION("run test for int64_t") {
-    runTest<int64_t>();
-  }
-  SECTION("run test for unsigned int") {
-    runTest<unsigned int>();
-  }
-  SECTION("run test for uint64_t") {
-    runTest<uint64_t>();
-  }
+  SECTION("run test for __half") { runTest<__half>(); }
+  SECTION("run test for int64_t") { runTest<int64_t>(); }
+  SECTION("run test for unsigned int") { runTest<unsigned int>(); }
+  SECTION("run test for uint64_t") { runTest<uint64_t>(); }
 }
@@ -21,12 +21,10 @@ THE SOFTWARE.
 #include <hip_test_checkers.hh>
 #include <hip_test_common.hh>
 #include <hip/hip_fp16.h>
-#include <hip_test_defgroups.hh>

 const int size = 32;

-template <typename T>
-__global__ void shflDownSum(T* a, int size) {
+template <typename T> __global__ void shflDownSum(T* a, int size) {
  T val = a[threadIdx.x];
  for (int i = size / 2; i > 0; i /= 2) {
    val += __shfl_down(val, i, size);
@@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) {
  a[threadIdx.x] = val;
 }

-template <typename T>
-__global__ void shflUpSum(T* a, int size) {
+template <typename T> __global__ void shflUpSum(T* a, int size) {
  T val = a[threadIdx.x];
  for (int i = size / 2; i > 0; i /= 2) {
    val += __shfl_up(val, i, size);
@@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) {
  a[threadIdx.x] = val;
 }

-template <typename T>
-__global__ void shflXorSum(T* a, int size) {
+template <typename T> __global__ void shflXorSum(T* a, int size) {
  T val = a[threadIdx.x];
-  for (int i = size/2; i > 0; i /= 2) {
+  for (int i = size / 2; i > 0; i /= 2) {
    val += __shfl_xor(val, i, size);
  }
  a[threadIdx.x] = val;
 }

 static void getFactor(int* fact) { *fact = 101; }
-static void getFactor(unsigned int* fact) {
-  *fact = static_cast<unsigned int>(INT32_MAX)+1;
-}
+static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
 static void getFactor(float* fact) { *fact = 2.5; }
 static void getFactor(double* fact) { *fact = 2.5; }
 static void getFactor(__half* fact) { *fact = 2.5; }
 static void getFactor(int64_t* fact) { *fact = 303; }
-static void getFactor(uint64_t* fact) {
-  *fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
-}
+static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }

 template <typename T> T sum(T* a) {
  T cpuSum = 0;
  T factor;
  getFactor(&factor);
  for (int i = 0; i < size; i++) {
-      a[i] = i + factor;
-      cpuSum += a[i];
+    a[i] = i + factor;
+    cpuSum += a[i];
  }
  return cpuSum;
 }
@@ -80,8 +72,8 @@ template <> __half sum(__half* a) {
  __half factor;
  getFactor(&factor);
  for (int i = 0; i < size; i++) {
-      a[i] = i + __half2float(factor);
-      cpuSum = __half2float(cpuSum) + __half2float(a[i]);
+    a[i] = i + __half2float(factor);
+    cpuSum = __half2float(cpuSum) + __half2float(a[i]);
  }
  return cpuSum;
 }
@@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) {
  return false;
 }

-template <typename T>
-static void runTestShflUp() {
+template <typename T> static void runTestShflUp() {
  const int size = 32;
  T a[size];
  T cpuSum = sum(a);
@@ -114,8 +105,7 @@ static void runTestShflUp() {
  HIP_CHECK(hipFree(d_a));
 }

-template <typename T>
-static void runTestShflDown() {
+template <typename T> static void runTestShflDown() {
  T a[size];
  T cpuSum = sum(a);
  T* d_a;
@@ -127,8 +117,7 @@ static void runTestShflDown() {
  HIP_CHECK(hipFree(d_a));
 }

-template <typename T>
-static void runTestShflXor() {
+template <typename T> static void runTestShflXor() {
  T a[size];
  T cpuSum = sum(a);
  T* d_a;
@@ -141,12 +130,12 @@ static void runTestShflXor() {
 }

 /**
-* @addtogroup __shfl __shfl
-* @{
-* @ingroup ShflTest
-* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
-* Contains warp __shfl_up function
-*/
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
+ * Contains warp __shfl_up function
+ */

 /**
 * Test Description
@@ -164,27 +153,13 @@ static void runTestShflXor() {
 */

 TEST_CASE("Unit_runTestShfl_up") {
-  SECTION("runTestShflUp for int") {
-    runTestShflUp<int>();
-  }
-  SECTION("runTestShflUp for float") {
-    runTestShflUp<float>();
-  }
-  SECTION("runTestShflUp for double") {
-    runTestShflUp<double>();
-  }
-  SECTION("runTestShflUp for __half") {
-    runTestShflUp<__half>();
-  }
-  SECTION("runTestShflUp for int64_t") {
-    runTestShflUp<int64_t>();
-  }
-  SECTION("runTestShflUp for unsigned int") {
-    runTestShflUp<unsigned int>();
-  }
-  SECTION("runTestShflUp for uint64_t") {
-    runTestShflUp<uint64_t>();
-  }
+  SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
+  SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
+  SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
+  SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
+  SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
+  SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
+  SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
 }
 /**
 * End doxygen group __shfl.
@@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") {
 */

 /**
-* @addtogroup __shfl __shfl
-* @{
-* @ingroup ShflTest
-* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
-* Contains warp __shfl_down function
-*/
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
+ * Contains warp __shfl_down function
+ */

 /**
 * Test Description
@@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") {
 */

 TEST_CASE("Unit_runTestShfl_Down") {
-  SECTION("runTestShflDown for int") {
-    runTestShflDown<int>();
-  }
-  SECTION("runTestShflDown for float") {
-    runTestShflDown<float>();
-  }
-  SECTION("runTestShflDown for double") {
-    runTestShflDown<double>();
-  }
-  SECTION("runTestShflDown for __half") {
-    runTestShflDown<__half>();
-  }
-  SECTION("runTestShflDown for int64_t") {
-    runTestShflDown<int64_t>();
-  }
-  SECTION("runTestShflDown for unsigned int") {
-    runTestShflDown<unsigned int>();
-  }
-  SECTION("runTestShflDown for uint64_t") {
-    runTestShflDown<uint64_t>();
-  }
+  SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
+  SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
+  SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
+  SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
+  SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
+  SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
+  SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
 }
 /**
 * End doxygen group __shfl.
@@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") {
 */

 /**
-* @addtogroup __shfl __shfl
-* @{
-* @ingroup ShflTest
-* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
-* Contains warp __shfl_xor function
-*/
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
+ * Contains warp __shfl_xor function
+ */

 /**
 * Test Description
@@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") {
 */

 TEST_CASE("Unit_runTestShfl_Xor") {
-  SECTION("runTestShflXor for int") {
-    runTestShflXor<int>();
-  }
-  SECTION("runTestShflXor for float") {
-    runTestShflXor<float>();
-  }
-  SECTION("runTestShflXor for double") {
-    runTestShflXor<double>();
-  }
-  SECTION("runTestShflXor for __half") {
-    runTestShflXor<__half>();
-  }
-  SECTION("runTestShflXor for int64_t") {
-    runTestShflXor<int64_t>();
-  }
-  SECTION("runTestShflXor for unsigned int") {
-    runTestShflXor<unsigned int>();
-  }
-  SECTION("runTestShflXor for uint64_t") {
-    runTestShflXor<uint64_t>();
-  }
+  SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
+  SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
+  SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
+  SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
+  SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
+  SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
+  SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
 }
 /**
 * End doxygen group __shfl.
@@ -20,7 +20,6 @@ THE SOFTWARE.
 #include <hip_test_kernels.hh>
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>
-#include <hip_test_defgroups.hh>
 #define NUMBER_OF_THREADS 10
 static bool thread_results[NUMBER_OF_THREADS];

@@ -54,8 +53,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") {

  HIP_CHECK(hipStreamCreate(&stream));
  HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue);
-  HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr),
-                  hipErrorInvalidValue);
+  HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue);
  HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue);
  HIP_CHECK(hipStreamDestroy(stream));
 }
@@ -145,9 +143,7 @@ static bool validateStreamGetDevice() {
  return true;
 }

-static void thread_Test(int threadNum) {
-  thread_results[threadNum] = validateStreamGetDevice();
-}
+static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); }

 static bool test_hipStreamGetDevice_MThread() {
  std::vector<std::thread> tests;
@@ -158,7 +154,7 @@ static bool test_hipStreamGetDevice_MThread() {
    tests.push_back(std::thread(thread_Test, idx));
  }
  // Wait for all threads to complete
-  for (std::thread &t : tests) {
+  for (std::thread& t : tests) {
    t.join();
  }
  // Wait for thread
@@ -169,9 +165,7 @@ static bool test_hipStreamGetDevice_MThread() {
  return status;
 }

-TEST_CASE("Unit_hipStreamGetDevice_MThread") {
-  REQUIRE(true == test_hipStreamGetDevice_MThread());
-}
+TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); }

 /**
 * Test Description